Compare commits

...

326 Commits

Author SHA1 Message Date
Emmanuel Marty
15ee2dfe11
Bump version 2023-02-27 18:23:00 +01:00
Emmanuel Marty
35ec6d73da
Optimal LZSA1 compression 2023-02-27 08:26:42 +01:00
Emmanuel Marty
6b08bc3990
Update README 2023-02-13 10:37:25 +01:00
Emmanuel Marty
9350d977bf
Add consts 2023-02-10 17:08:03 +01:00
Emmanuel Marty
82f03b55e3
Faster LZSA1 compression 2023-02-02 11:11:14 +01:00
Emmanuel Marty
583e4db62e
Small improvements 2023-01-30 13:21:43 +01:00
Emmanuel Marty
398885a52d
Small simplifications in matchfinder 2023-01-30 13:19:03 +01:00
Emmanuel Marty
21a0dc70c8
Fix CppCheck warnings 2023-01-30 13:17:30 +01:00
Emmanuel Marty
8cea101625
Bump version 2022-10-21 13:59:48 +02:00
Emmanuel Marty
b86ccf8f7b
Small LZSA1 improvements; remove unneeded tests 2022-10-20 17:16:34 +02:00
Emmanuel Marty
185ea0cbf2
Compress LZSA1 another 25% faster; minor cleanup 2022-10-19 10:39:40 +02:00
Emmanuel Marty
ed81dd69df
Fix C99 warning 2022-10-18 07:56:51 +02:00
Emmanuel Marty
bea90736d5
Avoid forward declarations 2022-10-17 18:37:06 +02:00
Emmanuel Marty
3eaf926c1a
Fix some documentation comments 2022-10-17 09:43:56 +02:00
Emmanuel Marty
1bca5b995a
Add documentation comments 2022-10-17 09:42:39 +02:00
Emmanuel Marty
5484395465
Add more missing constants; more minor cleanup 2022-10-16 18:39:24 +02:00
Emmanuel Marty
34ed06abfb
Add missing consts; remove unneeded code; clean up 2022-10-15 12:10:41 +02:00
Emmanuel Marty
930383a18a
Bump version 2022-05-04 15:03:32 +02:00
Emmanuel Marty
eeec526eeb
Compress LZSA1 another 35% faster 2022-05-04 11:32:21 +02:00
Emmanuel Marty
613f3ef0d7
Small cleanup 2022-04-28 08:02:27 +02:00
Emmanuel Marty
38bfea7ecf
Small LZSA2 improvement 2022-04-20 14:06:19 +02:00
Emmanuel Marty
a5f3691d4f
Small cleanup; compress LZSA2 another 1% faster 2022-04-19 09:18:09 +02:00
Emmanuel Marty
55101e8ac7
Merge pull request #67 from emmanuel-marty/spke/lzsa2_z80_fast
New fast decompressor for LZSA2 (-6 bytes, +1% speed)
2022-04-08 16:18:39 +02:00
specke
b1a87b55da New fast decompressor for LZSA2 (-6 bytes, +1% speed) 2022-04-08 13:34:56 +01:00
Emmanuel Marty
120bd71ed9
Bump version 2022-04-06 10:33:31 +02:00
Emmanuel Marty
86b7fe0738
Merge pull request #66 from emmanuel-marty/spke/new_z80_small
New small decompressor for LZSA2 (-5 bytes, +1% speed)
2022-04-05 18:22:18 +02:00
specke
efc19e9d93 New small decompressor for LZSA2 (-5 bytes, +1% speed) 2022-04-05 11:41:52 +01:00
Emmanuel Marty
ae942d4eec
Small LZSA2 speedup in forward arrivals parser 2022-04-05 07:13:03 +02:00
Emmanuel Marty
81fd3af0b3
Small improvement 2022-04-04 13:58:00 +02:00
Emmanuel Marty
34de880080
Compress LZSA2 a bit faster again; small cleanup 2022-04-03 20:23:55 +02:00
Emmanuel Marty
9e11c0893a
Pack LZSA1 35% faster, and LZSA2 another 5% faster 2022-04-02 08:49:26 +02:00
Emmanuel Marty
acfa11d733
Bump version 2022-01-02 07:52:10 +01:00
Emmanuel Marty
6b3dff18a5
Compress ~5% faster; small ratio increase 2022-01-02 07:15:23 +01:00
Emmanuel Marty
9a8a04f9b2
Bump version 2021-11-30 10:54:48 +01:00
Emmanuel Marty
79fadb350e
Tiny token reduction, ratio increase for some data 2021-11-28 10:01:11 +01:00
Emmanuel Marty
61fb2b881e
Tiny LZSA1 ratio increase 2021-11-26 15:41:50 +01:00
Emmanuel Marty
6389895e41
Free, tiny LZSA2 ratio increase for some files 2021-11-26 11:24:44 +01:00
Emmanuel Marty
a48db51134
Merge pull request #64 from jbrandwood/master
Fix reorganized 6502 decompress_faster. Approx 3-4% faster, LZSA2 add…
2021-11-23 12:16:28 +01:00
John Brandwood
4b046625e6 Fix reorganized 6502 decompress_faster. Approx 3-4% faster, LZSA2 adds 1 byte. 2021-11-22 17:02:37 -05:00
Emmanuel Marty
978c44eca7
Revert 2021-11-21 18:03:03 +01:00
Emmanuel Marty
bed1006a6d
Merge pull request #63 from jbrandwood/master
Make 6502 decompress_faster_v2.asm (& v1) 3-4% faster at the cost of 1 byte extra.
2021-11-21 12:14:22 +01:00
John Brandwood
7610a965a5 Make 6502 decompress_faster_v2.asm (& v1) 3-4% faster at the cost of 1 byte. 2021-11-20 15:10:56 -05:00
Emmanuel Marty
4e59375048
Bump version 2021-10-13 11:49:49 +02:00
Emmanuel Marty
854c03bd53
Another small LZSA2 speedup 2021-10-12 21:02:16 +02:00
Emmanuel Marty
6aee0031ed
Increase LZSA2 ratio a little further 2021-10-12 15:52:11 +02:00
Emmanuel Marty
bb1b4fda14
Compress LZSA2 a bit faster again 2021-10-10 07:52:03 +02:00
Emmanuel Marty
c6a93601cf
Compress a little faster again 2021-10-05 12:11:39 +02:00
Emmanuel Marty
f665e8307d
Merge pull request #62 from Mistranger/master
Allow piping files to stdin and stdout.
2021-09-10 12:42:19 +02:00
cybermind
c1a2e9a82c Allow piping files to stdin and stdout 2021-07-26 22:06:54 +05:00
Emmanuel Marty
c0259a77b4
Bump version 2021-06-06 11:40:58 +02:00
Emmanuel Marty
5e404e93d1
Small LZSA2 ratio increase for some files 2021-06-04 19:11:22 +02:00
Emmanuel Marty
65d6972f2c
Add link to streamed LZSA2 depacker for 8088 2021-04-30 14:48:23 +02:00
Emmanuel Marty
bbf782ced8
Update README 2021-04-30 14:42:43 +02:00
Emmanuel Marty
5cfec00d87
Add Apple II+/IIe demo link to README 2021-04-15 13:21:26 +02:00
Emmanuel Marty
48f64a1d20
Merge pull request #60 from specke/master
5% faster decompressor for LZSA1 on Z80
2021-04-08 09:17:17 +02:00
introspec
e9a85e92dc
Minor edit
Make returned values more predictable
2021-04-07 19:54:23 +01:00
introspec
004db30296
Improved "fast" Z80 decompressor for LZSA1
113(+4) bytes, +5% speed
2021-04-07 19:50:12 +01:00
introspec
4eae728e56
Merge pull request #7 from emmanuel-marty/master
Synchronize with the head repository
2021-04-07 19:47:08 +01:00
Emmanuel Marty
42aad36b4d
Merge pull request #59 from jbrandwood/master
Reorganize 6502 decompress_faster depackers for smaller size and grea…
2021-03-26 14:22:42 +01:00
John Brandwood
03fd8751b0 Reorganize 6502 decompress_faster depackers for smaller size and greater speed. 2021-03-25 19:01:35 -04:00
Emmanuel Marty
95a189b1ed
Merge pull request #58 from specke/master
Added a good game that uses LZSA
2021-03-24 13:47:26 +01:00
introspec
be29323516
Update README.md
Added Marsmare: Alienation
2021-03-24 10:35:19 +00:00
introspec
9cd6c554c5
Merge pull request #6 from emmanuel-marty/master
Synchronize with the head
2021-03-24 10:30:25 +00:00
Emmanuel Marty
8075b5ab68
Fix #54 (LZSA2 spec typo) reported by remy-luisant 2021-01-13 11:30:17 +01:00
Emmanuel Marty
488c288d8f
Bump version 2020-12-17 16:27:58 +01:00
Emmanuel Marty
e5538544d5
Another small LZSA2 compression speedup 2020-12-17 14:47:25 +01:00
Emmanuel Marty
6bebfb75bd
Compress LZSA2 a little bit faster 2020-12-16 10:03:18 +01:00
Emmanuel Marty
71493cf889
Keep debug info when building executable 2020-12-16 10:02:09 +01:00
Emmanuel Marty
4402f1b2b8
Update README 2020-12-07 10:13:44 +01:00
Emmanuel Marty
d1f067ef15
Don't create empty directory in Makefile 2020-11-25 18:21:53 +01:00
Emmanuel Marty
710257970e
Merge pull request #51 from dougmasten/dev
Space optimizations for backward HD6309 depackers
2020-11-22 12:36:00 +01:00
Doug Masten
8689a42ff9 Save 10 bytes for backward HD6309 LZSA2 depacker 2020-11-20 16:55:53 -06:00
Doug Masten
4d20175edd Save 8 bytes for backward HD6309 LZSA1 depacker 2020-11-20 16:14:18 -06:00
Emmanuel Marty
c304c03978
Bump version 2020-11-07 09:39:29 +01:00
Emmanuel Marty
d85fc4e034
Save 1 byte/2 cycles for fast 6502 LZSA2 depacker 2020-11-07 08:41:31 +01:00
Emmanuel Marty
7ea6b63db8
Save 1 byte/2 cycles for small 6502 LZSA2 depacker 2020-11-07 00:30:35 +01:00
Emmanuel Marty
b98f8410fe
Remove unneeded code 2020-11-06 16:26:37 +01:00
Emmanuel Marty
fc927c783c
Small LZSA2 compression speedup 2020-11-02 15:24:08 +01:00
Emmanuel Marty
34715950fd
Add backward depackers for HD6309 2020-11-01 22:57:50 +01:00
Emmanuel Marty
e00eb2bd62
Update README 2020-10-19 15:07:14 +02:00
Emmanuel Marty
27b04e8bd4
Merge pull request #50 from dougmasten/dev
Optimization to 6809 depacker and added new H6309 version
2020-10-18 13:54:25 +02:00
Emmanuel Marty
fd36bf0545
Clean up 2020-10-13 16:32:58 +02:00
Emmanuel Marty
4fadd7bbef
Clean up 2020-10-13 16:31:30 +02:00
Emmanuel Marty
441ea21d4f
Compress LZSA2 a little faster 2020-10-13 16:10:18 +02:00
Emmanuel Marty
b979d29eb1
Don't use bool in command-line tool (C99 only) 2020-10-13 16:09:02 +02:00
Emmanuel Marty
6d96a8275d
Don't use bool in command-line tool (C99 only) 2020-10-13 16:08:35 +02:00
Emmanuel Marty
a35fc03a27
Merge pull request #49 from peterferrie/master
not-fast 65816 version
2020-09-11 08:38:57 +02:00
Peter Ferrie
2442d6e489 not-fast 65816 version 2020-09-10 13:12:17 -07:00
Emmanuel Marty
01228f3eeb
Bump version 2020-08-18 11:57:25 +02:00
Emmanuel Marty
e3fd315541
Faster LZSA1 compression 2020-08-18 11:51:24 +02:00
Emmanuel Marty
5a0da16874
Increase LZSA2 ratio for some input files 2020-08-18 09:13:54 +02:00
Emmanuel Marty
028007b57c
Bump version 2020-08-02 09:40:31 +02:00
Emmanuel Marty
4682b2e917
Small simplification 2020-07-29 15:23:22 +02:00
Emmanuel Marty
060f5d3350
Simplify code, compress LZSA2 another 15% faster 2020-07-29 13:01:24 +02:00
Emmanuel Marty
33eec56b9b
Bump version 2020-07-27 15:36:01 +02:00
Emmanuel Marty
90fa770458
Compress another 8% faster 2020-07-27 13:25:16 +02:00
Emmanuel Marty
b2971da2b4
Nicer code 2020-07-26 16:38:22 +02:00
Emmanuel Marty
3fb9dc54b1
Compress LZSA2 another 3% faster 2020-07-26 10:07:03 +02:00
Emmanuel Marty
00d1d95625
Small improvement 2020-07-24 19:18:46 +02:00
Emmanuel Marty
e4f013f2db
Compress LZSA2 17% faster 2020-07-24 17:14:01 +02:00
Emmanuel Marty
703ff19a3a
Bump version 2020-07-14 23:50:44 +02:00
Emmanuel Marty
fc5081fb1a
Rename confusing definitions 2020-07-14 22:36:38 +02:00
Emmanuel Marty
61698b5036
Another LZSA2 compression speedup 2020-07-14 17:01:07 +02:00
Emmanuel Marty
cf49af5cda
Faster LZSA2 compression 2020-07-14 12:36:56 +02:00
Emmanuel Marty
c39158eea8
Compress LZSA2 faster, clean code up 2020-07-13 19:34:07 +02:00
Emmanuel Marty
4864f3c184
Compress LZSA1 a little faster 2020-07-10 17:45:13 +02:00
Emmanuel Marty
8ed768aafc
Nicer and faster code 2020-07-10 08:55:45 +02:00
Emmanuel Marty
9c7495f458
Compress LZSA2 ~12% faster 2020-07-06 12:47:56 +02:00
Doug Masten
81f49fe430 Fix byte count for unlzsa2-6309.s in 6809 LZSA depackers 2020-07-03 22:17:20 -05:00
Doug Masten
eca750b485 Add enhanced Hitachi 6309 version to 6809 LZSA depackers 2020-07-03 22:09:11 -05:00
Doug Masten
ecb6bc5106 Tiny speed improvement to 6809 LZSA depackers
Same space used but improved match length code by 1 cycle for M6809. On H6309 the clock cycles are the same.
2020-07-03 01:29:02 -05:00
Doug Masten
31d5dc2419 Slight speed and space improvement to nibble processing in 6809 LZSA2 depackers 2020-06-30 14:05:59 -05:00
Doug Masten
214e03555f Slight speed improvement in 6809 LZSA depackers
Switch instruction "tfr x,u" to "leau ,x" which is 2 clock cycles faster on M6809. On H6309 both instruction have same clock cycles.
2020-06-29 00:36:49 -05:00
Emmanuel Marty
afbb1de16c
Merge pull request #48 from dougmasten/dev
More optimizations to the 6809 LZSA depackers
2020-06-28 15:46:17 +02:00
Doug Masten
078edef880 Optimize match offset code in 6809 LZSA2 depacker 2020-06-27 04:17:05 -05:00
Doug Masten
03692fca2c Update code byte counts for 6809 LZSA depackers 2020-06-27 02:02:33 -05:00
Doug Masten
39e11422ec Delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
fde15d3fb0 Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
fc8120f0da Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
c7b3ffc067 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
137c6201be One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
e397428c1f Remove trailing whitespaces in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
b8cfbbbc7b Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
28ca829924 delay clearing high part of literals count until we really have to in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
27562f4761 Restructure code to eliminate one BRA instruction from loop in 6809 LZSA1 depacker 2020-06-27 02:02:33 -05:00
Doug Masten
0307d228a0
Merge pull request #3 from dougmasten/master
Merge pull request
2020-06-26 17:13:02 -05:00
Doug Masten
e8b2ebb89f
Merge pull request #2 from emmanuel-marty/master
Merge pull request
2020-06-26 17:08:44 -05:00
Doug Masten
f72133f4cf Move instruction before branch to save one byte in 6809 LZSA2 depacker 2020-06-26 16:48:58 -05:00
Doug Masten
56ba563794 One byte saving for setting reg A to $FF in 6809 LZSA2 depacker 2020-06-26 15:59:24 -05:00
Doug Masten
c0f09db364 Delay clearing high part of literals count until we really have to in 6809 LZSA2 depacker 2020-06-26 15:56:28 -05:00
Doug Masten
99db30a732 Optimize handling of 9 bits offset in 6809 LZSA2 depacker 2020-06-26 15:18:53 -05:00
Doug Masten
061ca99838 Optimize handling of token's Z flag bit in 6809 LZSA2 depacker 2020-06-26 14:52:09 -05:00
Doug Masten
7b96368469 Optimize match offset code in 6809 LZSA2 depacker 2020-06-26 14:42:36 -05:00
Doug Masten
e9540b2e3d Remove unnecessary "ADDB #$18" as register B will always have this value from 6809 LZSA2 depacker 2020-06-24 00:49:09 -05:00
Emmanuel Marty
40212975c2
Bump version 2020-06-22 10:08:40 +02:00
Emmanuel Marty
07c3969432
Compress LZSA2 raw files one byte shorter 2020-06-22 00:13:14 +02:00
Doug Masten
6a47ed7f41 Remove unnecessary "ADDB #$18" as B register will always have this value from 6809 LZSA depacker 2020-06-21 12:32:54 -05:00
Doug Masten
06d63de9d7 Rearrange "CLRA" instruction for slight speed optimization in 6809 LZSA depacker 2020-06-21 12:16:07 -05:00
Doug Masten
b5b8ca556a Rearrange match offset code to save 2 bytes in 6809 LZSA depacker 2020-06-21 01:09:15 -05:00
Emmanuel Marty
f724663ba8
Update README some more 2020-06-21 00:04:03 +02:00
Emmanuel Marty
798c07f6e0
Update README again 2020-06-20 23:59:01 +02:00
Emmanuel Marty
2f79779beb
Update README 2020-06-20 23:57:22 +02:00
Emmanuel Marty
a9b5a7511c
Merge pull request #47 from dougmasten/dom
Various optimizations to the 6809 LZSA depackers
2020-06-20 21:24:28 +02:00
Doug Masten
7f0316b81c Update code byte counts for 6809 LZSA depackers 2020-06-20 12:45:51 -05:00
Doug Masten
e8edc3242d Restructure code to eliminate one BRA instruction in unlzsa1.s 6809 LZSA depacker 2020-06-20 12:27:31 -05:00
Doug Masten
1dd65731c4 Remove unnecessary "ADDB #$12" as B register will always have this value from 6809 LZSA depackers 2020-06-20 12:27:22 -05:00
Doug Masten
f233d552ca Remove unnecessary "TSTB" instruction from 6809 LZSA depackers 2020-06-20 12:11:35 -05:00
Doug Masten
8c99570b06
Merge pull request #1 from emmanuel-marty/master
Add backward depackers for 6809
2020-06-20 12:03:30 -05:00
Emmanuel Marty
ee969968c1
Add backward depackers for 6809 2020-06-20 14:41:25 +02:00
Emmanuel Marty
f920485899
-3 bytes for 6809 LZSA2 depacker, slightly faster 2020-06-19 20:08:09 +02:00
Emmanuel Marty
2cdeda4784
-2 bytes for 6809 LZSA1 depacker, slightly faster 2020-06-19 20:07:41 +02:00
Emmanuel Marty
f198431a71
-2 bytes for 6809 LZSA2 depacker, speed unchanged 2020-06-19 18:26:25 +02:00
Emmanuel Marty
a6a69ebe17
Mention 6809 decompressors 2020-06-19 12:55:21 +02:00
Emmanuel Marty
f80706ff7e
-5 bytes and faster 6809 LZSA2 depacker 2020-06-19 12:50:10 +02:00
Emmanuel Marty
c3b111cea6
-2 bytes and faster 6809 LZSA1 depacker 2020-06-19 12:49:46 +02:00
Emmanuel Marty
e253d3e628
-1 byte for 6809 LZSA2 depacker 2020-06-19 08:57:01 +02:00
Emmanuel Marty
26bf944d9f
Add 6809 depacker for LZSA2 2020-06-18 17:48:08 +02:00
Emmanuel Marty
b88d3465e3
Compress LZSA2 a tiny bit faster 2020-06-18 17:47:31 +02:00
Emmanuel Marty
1d0427ecae
Make 6809 depacker for LZSA1 smaller/faster 2020-06-18 17:46:42 +02:00
Emmanuel Marty
3ce9a2b36e
Add 6809 depacker for LZSA1 2020-06-18 13:26:25 +02:00
Emmanuel Marty
930fe453eb
Bump version 2020-05-29 19:03:17 +02:00
Emmanuel Marty
193f9f467b
Fix C99-only feature 2020-05-28 20:00:30 +02:00
Emmanuel Marty
278fcc0256
Faster LZSA2 compression 2020-05-28 18:38:19 +02:00
Emmanuel Marty
231f6580c5
Another small LZSA2 compression speedup 2020-05-28 15:10:38 +02:00
Emmanuel Marty
dc413164ad
Compress a little faster 2020-05-24 15:19:59 +02:00
Emmanuel Marty
07104538b7
Fix #46 2020-05-06 19:36:39 +02:00
Emmanuel Marty
c173a5130f
Fix issue #45 and some extra warnings 2020-04-26 10:24:15 +02:00
Emmanuel Marty
d6c43f507c
Add reference to RomWBW that uses LZSA2 2020-04-21 23:39:05 +02:00
Emmanuel Marty
b0e2e7df75
Add reference to the Commander X16 2020-04-20 18:14:13 +02:00
Emmanuel Marty
b61ed6a229
Add reference to Gameboy depacker 2020-04-20 16:32:28 +02:00
Emmanuel Marty
7d59fe3325
Bump version 2020-04-11 15:38:43 +02:00
Emmanuel Marty
668204d953
Merge optimizations by Pavel Zagrebin
Manually merge PR #44
2020-04-04 13:29:25 +02:00
Emmanuel Marty
47e54ac110
Remove unused code 2020-03-24 20:18:22 +01:00
Emmanuel Marty
26a64de95e
Compress LZSA2 faster 2020-03-24 12:04:25 +01:00
Emmanuel Marty
f27ac9ae25
Remove code that is now unnecessary 2020-03-14 15:12:02 +01:00
Emmanuel Marty
236df36f2b
Another small compression speedup 2020-03-14 13:13:01 +01:00
Emmanuel Marty
78276c18da
Update README.md 2020-03-12 08:19:17 +01:00
Emmanuel Marty
0852f337bd
Add link to Motorola 68K decompressors 2020-03-06 18:54:09 +01:00
Emmanuel Marty
3efb9928a1
Merge pull request #42 from odzhan/master
compact decompressors for x86
2020-01-13 23:38:08 +01:00
odzhan
5fddaca0f8 compact decompressors for x86 2020-01-13 18:14:11 +00:00
Emmanuel Marty
693618f100
More asm fixes 2020-01-08 13:11:43 +01:00
Emmanuel Marty
aa122d1e05
Fix assembly when not using LZSA_SHORT_CP 2020-01-08 10:40:26 +01:00
Emmanuel Marty
315cda7b4f
Merge pull request #40 from peterferrie/master
cut one byte
2020-01-08 10:26:29 +01:00
Peter Ferrie
9f020b3605 fix typos in original code 2020-01-07 17:02:43 -08:00
Peter Ferrie
22e2f80ae7 cut one byte 2020-01-07 16:57:49 -08:00
Emmanuel Marty
925e435e53
Merge pull request #39 from peterferrie/master
fasterer v2
2020-01-07 19:49:34 +01:00
Peter Ferrie
45b91ddaa2 fasterer v2 2020-01-06 22:24:07 -08:00
Emmanuel Marty
fb7e03030f
Merge pull request #38 from specke/master
-2 bytes
2020-01-03 10:37:28 +01:00
Emmanuel Marty
29e8960e6f
Add faster LZSA1 6502 depacker by jbrandwood 2020-01-03 10:31:54 +01:00
Emmanuel Marty
410544f4e6
Fast 6502 LZSA2 depacker: smaller size, same speed 2020-01-03 10:01:23 +01:00
introspec
96b9933bd3
remove temporary label 2020-01-02 14:32:30 +00:00
introspec
9cd9fa5939
-2 bytes
(same speed)
2020-01-02 14:30:00 +00:00
introspec
3b37a0bb70
Merge pull request #5 from emmanuel-marty/master
Catch up with the changes in main
2020-01-02 13:51:29 +00:00
Emmanuel Marty
8721c11041
Add faster LZSA2 depacker by jbrandwood 2019-12-24 12:02:34 +01:00
Emmanuel Marty
a46796b6a9
Fix assembling of 6502 fast v1 depacker 2019-12-24 10:16:01 +01:00
Emmanuel Marty
64e641411e
Small compression speedup 2019-12-21 18:59:56 +01:00
Emmanuel Marty
7068c258bd
Bump version 2019-12-15 23:37:20 +01:00
Emmanuel Marty
a0ac24d105
Tiny LZSA1 ratio increase; small improvements 2019-12-15 23:36:51 +01:00
Emmanuel Marty
e9ca5032bc
Ratio increase 2019-12-15 18:04:16 +01:00
Emmanuel Marty
fc5f540a68
Bump version 2019-12-09 09:55:50 +01:00
Emmanuel Marty
4c566286f5
Increase ratio 2019-12-09 09:54:56 +01:00
Emmanuel Marty
65a262ec95
Bump version 2019-11-27 15:26:34 +01:00
Emmanuel Marty
63da0eb49d
Small speedup 2019-11-27 09:47:32 +01:00
Emmanuel Marty
78ad147799
Don't systematically encode last byte as literal 2019-11-26 20:48:13 +01:00
Emmanuel Marty
88f563d84c
Secure last token decompression for non-raw blocks 2019-11-26 17:35:11 +01:00
Emmanuel Marty
d37589cfdb
Expand forward repmatch candidates 2019-11-26 13:33:08 +01:00
Emmanuel Marty
0b5e915d83
Split non-rep from repmatch candidates 2019-11-26 11:58:34 +01:00
Emmanuel Marty
a38e8b126c
Small LZSA2 compression speedup 2019-11-20 15:40:11 +01:00
Emmanuel Marty
5f4cf4dfc7
Fix for LZSA1 as well 2019-11-19 19:53:46 +01:00
Emmanuel Marty
8e4e7c06c7
Fix calculation for promoting literal+match seqs 2019-11-19 19:48:39 +01:00
Emmanuel Marty
b1738b4003
Promote some literal+match sequences to a match 2019-11-18 12:10:23 +01:00
Emmanuel Marty
e328f63feb
Bump version 2019-11-13 00:57:31 +01:00
Emmanuel Marty
e0c42afac9
Increase LZSA2 ratio 2019-11-13 00:57:09 +01:00
Emmanuel Marty
ce7fc33646
Reduce memory use 2019-11-12 00:30:24 +01:00
Emmanuel Marty
53b2013b73
Small improvement to merging large matches 2019-11-11 18:41:08 +01:00
Emmanuel Marty
df9690a949
Merge pull request #36 from peterferrie/master
fast v1
2019-11-09 17:55:19 +01:00
Peter Ferrie
302234a91b fast v2 2019-10-31 23:01:00 -07:00
Peter Ferrie
9fc9a49d67 fast v1 2019-10-31 22:20:02 -07:00
Emmanuel Marty
7371486513
Bump version 2019-10-29 12:10:13 +01:00
Emmanuel Marty
f249597dfd
Increase LZSA2 ratio 2019-10-29 12:09:14 +01:00
Emmanuel Marty
78f588a833
Clarify defines 2019-10-29 10:45:57 +01:00
Emmanuel Marty
c790fb8ebe
Add link to Gabba ZX Spectrum demo, that uses LZSA 2019-10-28 09:21:57 +01:00
Emmanuel Marty
3c4f535e0b
Increase LZSA2 ratio by ~0.02% 2019-10-27 14:55:39 +01:00
Emmanuel Marty
8551c3ff8a
Merge pull request #35 from MobyGamer/decompressor/8086_speed_jumptable
Rewrite 8088 jumptable decompressor for maximum speed
2019-10-27 10:28:48 +01:00
mobygamer
30192238ea Rewrite 8088 jumptable decompressor for maximum speed
This is a rewrite of LZSA1JMP.ASM to use a 256-element jumptable, which
allows the code to handle all of the hot paths (common cases) without
any branching.  This not only reduces branches (which are very costly on
x86) to a bare minimum, but also grants us foreknowledge in a decode
path of what steps can be skipped.

The new code is 12.7% faster than the old code, and assembles to less
than 3K of object code and data.
2019-10-26 23:34:24 -05:00
Emmanuel Marty
53fcd3b1a8
Generalize merging very large matches 2019-10-24 13:05:32 +02:00
Emmanuel Marty
f4cf97f176
Merge pull request #34 from specke/master
Added option for unrolled copying of long matches
2019-10-22 21:52:48 +02:00
introspec
d5d788946e
Added an option for unrolling long match copying
Usually useless and costing +57 bytes, this option can bring dramatic performance improvements on very compressible data dominated by long matches
2019-10-22 20:11:46 +01:00
introspec
e1e1276c96
Merge pull request #4 from emmanuel-marty/master
Re-sync with the main
2019-10-22 20:09:00 +01:00
Emmanuel Marty
16ac8c75af
Add link to PDP-11 depackers by Ivan Gorodetsky 2019-10-22 17:13:05 +02:00
Emmanuel Marty
05d77095ca
Bump version 2019-10-22 12:39:27 +02:00
Emmanuel Marty
b84fe7c332
Further increase LZSA2 ratio by ~0.1% on average 2019-10-22 12:37:46 +02:00
Emmanuel Marty
7dd039a152
Delete shrink_context.h 2019-10-22 12:37:16 +02:00
Emmanuel Marty
9f6ca2c25f
Delete shrink_block_v2.c 2019-10-22 12:37:04 +02:00
Emmanuel Marty
dbaa3fa921
Further increase LZSA2 ratio by ~0.1% on average 2019-10-22 12:36:41 +02:00
Emmanuel Marty
2926ad8436
Remove unused #includes 2019-10-21 12:29:38 +02:00
Emmanuel Marty
d9156d3d2b
Reduce LZSA1 token count by 2.5% on average 2019-10-19 13:10:41 +02:00
Emmanuel Marty
6adf92fc88
Merge pull request #33 from specke/master
-1 byte
2019-10-11 10:18:05 +02:00
Emmanuel Marty
96df02c532
Remove unused code 2019-10-11 09:20:36 +02:00
Emmanuel Marty
89f1664ae6
Remove unused code 2019-10-11 09:14:19 +02:00
Emmanuel Marty
c363ecf527
Remove unused code 2019-10-11 09:11:49 +02:00
Emmanuel Marty
5141ed7c59
Remove unused code 2019-10-11 09:11:41 +02:00
Emmanuel Marty
c77c666568
Remove unused code 2019-10-11 09:10:07 +02:00
Emmanuel Marty
115a81cb71
Remove unused code 2019-10-11 09:09:42 +02:00
Emmanuel Marty
4436f216ce
Bump version 2019-10-11 09:06:50 +02:00
Emmanuel Marty
baa53f6889
Newly compressed LZSA2 files depack 0.7% faster 2019-10-11 09:05:58 +02:00
introspec
495a12216f
-1 byte
Very slightly faster too
2019-10-11 00:23:43 +01:00
Emmanuel Marty
b5117c3dfe
Fixes for -stats 2019-10-11 00:25:46 +02:00
Emmanuel Marty
f5ef6bf868
Merge pull request #32 from specke/master
Slightly faster unlzsa2_fast.asm for Z80
2019-10-11 00:22:12 +02:00
introspec
566e3a94e8
+0.2% speed
also, added an option to unroll LDIR for longer matches (which adds 38 bytes, but can be significantly faster for files with many long matches)
2019-10-10 22:50:23 +01:00
introspec
e3d7ec9c40
Merge pull request #3 from emmanuel-marty/master
Sync with E.Marty's branch
2019-10-10 22:46:53 +01:00
Emmanuel Marty
d209b73a30
Fix small bug 2019-10-10 14:42:08 +02:00
Emmanuel Marty
c1b18fb9fd
Implement -stats 2019-10-09 18:20:22 +02:00
Emmanuel Marty
6ce846ff24
Speed up LZSA2 compression 2019-10-09 16:07:29 +02:00
Emmanuel Marty
b09dadb1c1
Small LZSA2 token count reduction 2019-10-09 13:16:29 +02:00
Emmanuel Marty
03f841d04f
Speed up LZSA2 compression 2019-10-08 20:26:21 +02:00
Emmanuel Marty
44df8f3d2d
Add early-out, speed LZSA2 compression up further 2019-10-08 16:23:33 +02:00
Emmanuel Marty
bfb383befd
Speed up LZSA2 compression 2019-10-08 09:39:18 +02:00
Emmanuel Marty
39e2a90f81
Prevent small matchfinder inefficiency 2019-10-04 11:54:54 +02:00
Emmanuel Marty
33327201f7
Fix small LZSA2 token reduction inefficiency 2019-10-03 16:58:34 +02:00
Emmanuel Marty
29c6f3b2a3
Remove erroneous else statement 2019-09-26 19:13:09 +02:00
Emmanuel Marty
6a62f7d795
Update Z80 depackers changes history 2019-09-26 11:42:52 +02:00
Emmanuel Marty
681f78d1e8
Rename 2019-09-26 07:48:59 +02:00
Emmanuel Marty
8015ab8650
Rename 2019-09-26 07:48:44 +02:00
Emmanuel Marty
2f15298343
Rename 2019-09-26 07:48:33 +02:00
Emmanuel Marty
648a308d87
Rename 2019-09-26 07:48:19 +02:00
Emmanuel Marty
587a92f4ab
Rename Z80 depackers, add version history to LZSA1 2019-09-26 07:47:43 +02:00
Emmanuel Marty
7d9135c548
Update Z80 decompressors 2019-09-25 08:09:18 +02:00
Emmanuel Marty
ac9de3795c
Update Pareto frontier graph from spke 2019-09-25 07:56:47 +02:00
Emmanuel Marty
b4b4d39eff
Fix newly added external link 2019-09-24 18:03:20 +02:00
Emmanuel Marty
cb46987628
Update stats and links 2019-09-24 18:02:24 +02:00
Emmanuel Marty
e55c80a475
Clean up use of MODESWITCH_PENALTY; bump version 2019-09-24 14:43:17 +02:00
Emmanuel Marty
de0ff5d3b0
Reduce memory used for compression 2019-09-24 00:21:17 +02:00
Emmanuel Marty
249b8a4c46
Increase LZSA2 ratio and use forward parser for -m 2019-09-23 20:24:50 +02:00
Emmanuel Marty
74040890fc
Speed up LZSA2 compression (same binary output) 2019-09-23 16:58:03 +02:00
Emmanuel Marty
81e15d10f0
Add extra safety checks to LZSA2 token reducer 2019-09-22 20:41:09 +02:00
Emmanuel Marty
1869d85c1f
Simplify LZSA1 token reducer (same binary output) 2019-09-22 20:34:08 +02:00
Emmanuel Marty
1a4f662360
Bump version 2019-09-20 12:26:16 +02:00
Emmanuel Marty
c12e20b7fb
Improve LZSA2 compression ratio 2019-09-20 12:24:27 +02:00
Emmanuel Marty
51644ad2f9
Speed LZSA2 compression up further; fix typo 2019-09-19 17:18:37 +02:00
Emmanuel Marty
1495b27f69
Speed up LZSA1 compression with forward arrivals 2019-09-19 12:57:39 +02:00
Emmanuel Marty
c052a188f2
Reduce LZSA2 forward arrivals memory use 2019-09-19 11:46:03 +02:00
Emmanuel Marty
e4076e4090
Speed LZSA2 compression up; tiny ratio increase 2019-09-19 00:11:26 +02:00
Emmanuel Marty
8b7d0ab04d
Increase LZSA2 ratio. Decrease token count 2019-09-17 08:10:52 +02:00
Emmanuel Marty
b1da9c1aee
Add extra bound checks in C decompressors 2019-09-12 16:19:14 +02:00
Emmanuel Marty
b92a003338
Merge pull request #29 from francois-berder/master
Various improvements -- thank you!
2019-08-28 13:50:00 +02:00
Francois Berder
4f2d7da136 Fix main return value if compressing
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:41:54 +01:00
Francois Berder
a318ac2f83 Fix memory leak in comparestream_open
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:40:49 +01:00
Francois Berder
da67938978 Set dictionnary to NULL in lzsa_dictionary_free
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:39:07 +01:00
Emmanuel Marty
2d213bcff1
Bump version number 2019-08-27 13:18:23 +02:00
Emmanuel Marty
9de7e930e9
Faster LZSA1 z80 decompression 2019-08-27 13:16:20 +02:00
Emmanuel Marty
ef259e6867
Implement forward arrivals optimal parsers 2019-08-27 00:51:34 +02:00
Emmanuel Marty
90b4da64d1
Merge pull request #27 from uniabis/twobytesshorter
2bytes shorter
2019-08-26 23:49:27 +02:00
uniabis
a807344343 2bytes shorter 2019-08-22 12:55:55 +09:00
Emmanuel Marty
27d0fe4e83
Merge pull request #26 from arm-in/patch-1
Update README.md
2019-08-06 20:54:24 +02:00
Armin Müller
f8e445a98a
Update README.md
Now 67 bytes with commit be30cae636
2019-08-06 20:15:59 +02:00
Emmanuel Marty
0e567bde47
Merge pull request #25 from specke/master
-1 byte
2019-08-06 20:03:52 +02:00
introspec
be30cae636
-1 byte
slightly slower, but this is the size-optimized branch
2019-08-06 12:36:27 +01:00
Emmanuel Marty
1b368e71ad
Fix comments, header single inclusion defines 2019-08-04 16:42:30 +02:00
Emmanuel Marty
d98220ff42
Merge pull request #24 from specke/master
New Pareto frontier graph
2019-08-01 16:51:29 +02:00
introspec
d412433df4
New Pareto frontier graph
Shows improved performance of the new Z80 decompressors, esp. due to the improvements by uniabis
2019-08-01 15:26:53 +01:00
Emmanuel Marty
77c1492310
Merge pull request #23 from specke/master
New faster and shorter decompressors
2019-08-01 16:19:24 +02:00
introspec
44bff39de3
New faster and shorter decompressors
This update is mostly about better integration of improvements by uniabis, with spke contributing several smaller size optimizations.
2019-08-01 15:07:14 +01:00
Emmanuel Marty
3c690b04f5
Merge pull request #22 from specke/master
incorporated improvements by uniabis
2019-08-01 01:34:46 +02:00
introspec
e7bb1faece
Merge branch 'master' into master 2019-07-31 23:24:30 +01:00
Emmanuel Marty
e48d2dafde
Merge pull request #21 from uniabis/hd64180 - up to 3% speedup on Z80!
hd64180 support on z80 unpacker
2019-07-31 23:57:23 +02:00
introspec
51ef92cdab
incorporated improvements by uniabis
also, slightly faster decompression for fast packer in backwards mode
2019-07-31 20:42:47 +01:00
uniabis
8d0528fddc hd64180 support
a bit faster, a bit smaller
2019-07-31 01:39:27 +09:00
Emmanuel Marty
b3aae36ecc
Bump version 2019-07-28 00:25:51 +02:00
Emmanuel Marty
8787b1c3d8
Merge pull request #20 from specke/master (should be already fixed now..)
fix a bug in the backward version of unlzsa2_fast_v1.asm
2019-07-27 15:50:38 +02:00
Emmanuel Marty
0a04796b19
Fix for z80 LZSA2 fast backward depacker 2019-07-27 15:39:44 +02:00
introspec
ac3bf78273
fix a bug in the backward version of unlzsa2_fast_v1.asm
an INC HL slipped through
2019-07-27 14:14:54 +01:00
Emmanuel Marty
82edcb8bb5
Fix literal runs that are multiple of 256 bytes 2019-07-27 01:35:46 +02:00
Emmanuel Marty
b613d01565
Test incompressible data with raw blocks 2019-07-26 13:30:41 +02:00
Emmanuel Marty
ae4cc12aed
Use ACME syntax 2019-07-26 12:31:26 +02:00
Emmanuel Marty
316dfdcdce
Fix comments, remove unused vars 2019-07-26 01:12:17 +02:00
Emmanuel Marty
fd70be918c
Merge pull request #19 from specke/master
Support for -b in Z80 decompressors
2019-07-24 20:09:48 +02:00
Emmanuel Marty
4835e4c26c
Support backward decompression 2019-07-24 20:08:23 +02:00
introspec
cca79e3e59
Delete unlzsa_small_v1.asm 2019-07-24 17:31:22 +01:00
introspec
607b26d337
Delete unlzsa_fast_v1.asm 2019-07-24 17:31:14 +01:00
introspec
fd61f403ad
LZSA1 decompressors with added support for -b. 2019-07-24 17:30:37 +01:00
introspec
fcfba056d2
Add files via upload
LZSA2 decompressors with support for -b option.
2019-07-24 17:28:39 +01:00
Emmanuel Marty
0c4dbf2b72
Add files via upload
Show decompression safety distance for raw blocks
2019-07-24 15:43:44 +02:00
Emmanuel Marty
9f313d6ee6
Handle EOD in C depacker; fix #18; fix typos in usage 2019-07-23 23:28:52 +02:00
Emmanuel Marty
04cc67cf42
Add refence to The Hollow 2019-07-16 20:41:36 +02:00
Emmanuel Marty
081a29a3db
Fix copying multiples of 256 bytes 2019-07-14 16:14:55 +02:00
66 changed files with 7101 additions and 2522 deletions

View File

@ -44,7 +44,7 @@ The match offset is decoded according to the XYZ bits in the token
XYZ
00Z 5-bit offset: read a nibble for offset bits 1-4 and use the inverted bit Z of the token as bit 0 of the offset. set bits 5-15 of the offset to 1.
01Z 9-bit offset: read a byte for offset bits 0-7 and use the inverted bit Z for bit 8 of the offset. set bits 9-15 of the offset to 1.
10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset, then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1.
10Z 13-bit offset: read a nibble for offset bits 9-12 and use the inverted bit Z for bit 8 of the offset, then read a byte for offset bits 0-7. set bits 13-15 of the offset to 1. substract 512 from the offset to get the final value.
110 16-bit offset: read a byte for offset bits 8-15, then another byte for offset bits 0-7.
111 repeat offset: reuse the offset value of the previous match command.
@ -58,7 +58,7 @@ Note that the match offset is negative: it is added to the current decompressed
If the encoded match length is 7 or more, the 'M' bits in the token form the value 7, and an extra nibble is read:
* 0-14: the value is added to the 3 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
* 0-14: the value is added to the 7 stored in the token, and then the minmatch of 2 is added, to compose the final match length.
* 15: an extra byte follows
If an extra byte follows here, it can have two possible types of value:

View File

@ -1,8 +1,7 @@
CC=clang
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
CFLAGS=-O3 -g -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
OBJDIR=obj
LDFLAGS=
STRIP=strip
$(OBJDIR)/%.o: src/../%.c
@mkdir -p '$(@D)'
@ -18,7 +17,6 @@ OBJS += $(OBJDIR)/src/expand_context.o
OBJS += $(OBJDIR)/src/expand_inmem.o
OBJS += $(OBJDIR)/src/expand_streaming.o
OBJS += $(OBJDIR)/src/frame.o
OBJS += $(OBJDIR)/src/hashmap.o
OBJS += $(OBJDIR)/src/matchfinder.o
OBJS += $(OBJDIR)/src/shrink_block_v1.o
OBJS += $(OBJDIR)/src/shrink_block_v2.o
@ -34,9 +32,7 @@ OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
all: $(APP)
$(APP): $(OBJS)
@mkdir -p ../../bin/posix
$(CC) $^ $(LDFLAGS) -o $(APP)
$(STRIP) $(APP)
clean:
@rm -rf $(APP) $(OBJDIR)

View File

@ -3,6 +3,26 @@ LZSA is a collection of byte-aligned compression formats that are specifically e
![Pareto frontier](pareto_graph.png)
<sup>*ZX Spectrum</sup>
Check out [The Hollow](https://www.pouet.net/prod.php?which=81909) by Darklite and Offense, winner of the Solskogen 2019 wild compo, that uses LZSA on Z80.
[Gabba](https://www.pouet.net/prod.php?which=83539) by Stardust ranked 2nd in the ZX Spectrum demo compo at CAFe demoparty 2019 and also used LZSA on Z80.
[Myst Demake](http://www.deater.net/weave/vmwprod/mist/) for the Apple II by Vince Weaver, uses LZSA on 6502.
The 8 bit guy's [Commander X16 ROM](https://github.com/commanderx16/x16-rom) uses LZSA on 6502 as well.
[RomWBW](https://github.com/wwarthen/RomWBW) uses LZSA on Z80 for a variety of hobbyist computers.
The popular [rasm](https://github.com/EdouardBERGE/rasm) assembler for Z80 features LZSA-compressed data sections.
The [desolate](https://github.com/nzeemin/spectrum-desolate) game port to the ZX Spectrum uses LZSA compression on Z80.
[Marsmare: Alienation](https://zxonline.net/game/marsmare-alienation/), the winner of the recent [Yandex Retro Games Battle 2020](https://yandex.ru/museum/yrgb-2020-en), is using LZSA to compress its assets.
The [Lowtech demo](https://github.com/wiz21b/lowtech) for the Apple II+ and IIe, by Wiz/Imphobia, compresses data with LZSA.
The [Druid & Droid](https://leosoft.itch.io/druid-and-droid) game for the Amstrad CPC, also uses LZSA for compression.
The LZSA compression tool uses an aggressive optimal packing strategy to try to find the sequence of commands that gives the smallest packed file that decompresses to the original while maintaining the maximum possible decompression speed.
The compression formats give the user choices that range from decompressing faster than LZ4 on 8-bit systems with better compression, to compressing as well as ZX7 with much better decompression speed. LZSA1 is designed to replace LZ4 and LZSA2 to replace ZX7, in 8-bit scenarios.
@ -10,10 +30,11 @@ The compression formats give the user choices that range from decompressing fast
Compression ratio comparison between LZSA and other optimal packers, for a workload composed of ZX Spectrum and C64 files:
Bytes Ratio Decompression speed vs. LZ4
LZSA2 685610 53,18% <------ 75%
LZSA2 676681 52,49% <------ 75%
MegaLZ 4.89 679041 52,68% Not measured
ZX7 687133 53,30% 47,73%
LZ5 1.4.1 727107 56,40% 75%
LZSA1 736169 57,11% <------ 90%
LZSA1 735785 57,08% <------ 90%
Lizard -29 776122 60,21% Not measured
LZ4_HC -19 -B4 -BD 781049 60,59% 100%
Uncompressed 1289127 100% N/A
@ -21,13 +42,13 @@ Compression ratio comparison between LZSA and other optimal packers, for a workl
Performance over well-known compression corpus files:
Uncompressed LZ4_HC -19 -B4 -BD LZSA1 LZSA2
Canterbury 2810784 935827 (33,29%) 855044 (30,42%) 789075 (28,07%)
Silesia 211938580 77299725 (36,47%) 73707039 (34,78%) 69983184 (33,02%)
Calgary 3251493 1248780 (38,40%) 1196448 (36,80%) 1125462 (34,61%)
Large 11159482 3771025 (33,79%) 3648420 (32,69%) 3528725 (31,62%)
enwik9 1000000000 371841591 (37,18%) 355360717 (35,54%) 337063553 (33,71%)
Canterbury 2810784 935827 (33,29%) 850792 (30,27%) 770877 (27,43%)
Silesia 211938580 77299725 (36,47%) 73706340 (34,78%) 68928564 (32,52%)
Calgary 3251493 1248780 (38,40%) 1192123 (36,67%) 1110290 (34,15%)
Large 11159482 3771025 (33,79%) 3648393 (32,69%) 3519480 (31,54%)
enwik9 1000000000 371841591 (37,18%) 355360043 (35,54%) 334900611 (33,49%)
As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 69 bytes.
As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 67 bytes.
The compressor is approximately 2X slower than LZ4_HC but compresses better while maintaining similar decompression speeds and decompressor simplicity.
@ -39,6 +60,7 @@ The main differences between LZSA1 and the LZ4 compression format are:
As for LZSA2:
* 5-bit, 9-bit, 13-bit and 16-bit match offsets, using nibble encoding
* Rep-matches
* Shorter encoding of lengths, also using nibbles
* A minmatch of 2 bytes
* No (slow) bit-packing. LZSA2 uses byte alignment in the hot path, and nibbles.
@ -49,6 +71,8 @@ Inspirations:
* [LZ5/Lizard](https://github.com/inikep/lizard) by Przemyslaw Skibinski and Yann Collet.
* The suffix array intervals in [Wimlib](https://wimlib.net/git/?p=wimlib;a=tree) by Eric Biggers.
* ZX7 by Einar Saukas
* [apc](https://github.com/svendahl/cap) by Sven-Åke Dahl
* [Charles Bloom](http://cbloomrants.blogspot.com/)'s compression blog
License:
@ -57,9 +81,20 @@ License:
8-bit assembly code:
* Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke)
* Z80 decompressors (size- and speed-optimized) written by [introspec](https://github.com/specke) with optimizations by [uniabis](https://github.com/uniabis)
* 6502 and 8088 size-optimized improvements by [Peter Ferrie](https://github.com/peterferrie)
* 6502 speed-optimized decompressor by [John Brandwood](https://github.com/jbrandwood)
* 8088 speed-optimized decompressor by [Jim Leonard](https://github.com/mobygamer)
* 6809 decompressors (Tandy CoCo, Thomson MO/TO, Dragon 32/64..) optimized by [Doug Masten](https://github.com/dougmasten)
* Hitachi 6309 decompressors (Tandy CoCo 3) also contributed by [Doug Masten](https://github.com/dougmasten)
External links:
* [i8080 and PDP-11 decompressors](https://github.com/ivagorRetrocomp/DeLZSA) by Ivan Gorodetsky
* [MC68000 decompressors](https://github.com/tattlemuss/lz4-m68k/blob/master/src/lzsa.s) by Steven Tattersall
* [Gameboy decompressors](https://github.com/meltycode) by Meltycode, based on the Z80 code by introspec
* [Streamed LZSA2 depacker](https://hg.ulukai.org/ecm/inicomp/file/c1a1f9bd4382/lzsa2.asm) by C. Masloch
* LZSA's page on [Pouet](https://www.pouet.net/prod.php?which=81573)
# Compressed format

View File

@ -17,7 +17,7 @@ The 3-bytes LZSA header contains a signature and a traits byte:
Trait bits:
* V: 3 bit code that indicates which block data encoding is used. 0 is LZSA1 and 2 is LZSA2.
* V: 3 bit code that indicates which block data encoding is used. 0 is LZSA1 and 1 is LZSA2.
* Z: these bits in the traits are set to 0 for LZSA1 and LZSA2.
# Frame format

View File

@ -185,7 +185,6 @@
<ClInclude Include="..\src\format.h" />
<ClInclude Include="..\src\frame.h" />
<ClInclude Include="..\src\expand_inmem.h" />
<ClInclude Include="..\src\hashmap.h" />
<ClInclude Include="..\src\lib.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
@ -207,7 +206,6 @@
<ClCompile Include="..\src\expand_block_v2.c" />
<ClCompile Include="..\src\frame.c" />
<ClCompile Include="..\src\expand_inmem.c" />
<ClCompile Include="..\src\hashmap.c" />
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />

View File

@ -84,9 +84,6 @@
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_config.h">
<Filter>Fichiers sources\libdivsufsort\include</Filter>
</ClInclude>
<ClInclude Include="..\src\hashmap.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c">
@ -146,8 +143,5 @@
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort_utils.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile>
<ClCompile Include="..\src\hashmap.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -26,7 +26,6 @@
0CADC64722AAD8EB003E9821 /* expand_context.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC62F22AAD8EB003E9821 /* expand_context.c */; };
0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */; };
0CADC64A22AB8DAD003E9821 /* divsufsort_utils.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */; };
0CADC69622C8A420003E9821 /* hashmap.c in Sources */ = {isa = PBXBuildFile; fileRef = 0CADC69522C8A41F003E9821 /* hashmap.c */; };
/* End PBXBuildFile section */
/* Begin PBXCopyFilesBuildPhase section */
@ -81,8 +80,6 @@
0CADC63022AAD8EB003E9821 /* shrink_block_v2.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = shrink_block_v2.c; path = ../../src/shrink_block_v2.c; sourceTree = "<group>"; };
0CADC64922AB8DAD003E9821 /* divsufsort_utils.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; path = divsufsort_utils.c; sourceTree = "<group>"; };
0CADC64B22AB8DC3003E9821 /* divsufsort_config.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = divsufsort_config.h; sourceTree = "<group>"; };
0CADC69422C8A41F003E9821 /* hashmap.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = hashmap.h; path = ../../src/hashmap.h; sourceTree = "<group>"; };
0CADC69522C8A41F003E9821 /* hashmap.c */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.c; name = hashmap.c; path = ../../src/hashmap.c; sourceTree = "<group>"; };
/* End PBXFileReference section */
/* Begin PBXFrameworksBuildPhase section */
@ -130,8 +127,6 @@
0CADC62422AAD8EB003E9821 /* format.h */,
0CADC5F322AAD8EB003E9821 /* frame.c */,
0CADC62C22AAD8EB003E9821 /* frame.h */,
0CADC69522C8A41F003E9821 /* hashmap.c */,
0CADC69422C8A41F003E9821 /* hashmap.h */,
0CADC5F222AAD8EB003E9821 /* lib.h */,
0CADC5FC22AAD8EB003E9821 /* libdivsufsort */,
0CADC62222AAD8EB003E9821 /* lzsa.c */,
@ -240,7 +235,6 @@
isa = PBXSourcesBuildPhase;
buildActionMask = 2147483647;
files = (
0CADC69622C8A420003E9821 /* hashmap.c in Sources */,
0CADC64822AAD8EB003E9821 /* shrink_block_v2.c in Sources */,
0CADC63D22AAD8EB003E9821 /* sssort.c in Sources */,
0CADC64322AAD8EB003E9821 /* expand_block_v2.c in Sources */,

View File

@ -0,0 +1,305 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
DECOMPRESS_LZSA1_FAST
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$70 ; LITERALS_RUN_LEN?
BNE PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS_DIRECT
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS_DIRECT ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
TXA
BCS PREPARE_COPY_LARGE_LITERALS ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
PREPARE_COPY_LITERALS
TAX
LDA SHIFT_TABLE-1,X ; shift literals length into place
; -1 because position 00 is reserved
PREPARE_COPY_LITERALS_DIRECT
TAX
PREPARE_COPY_LARGE_LITERALS
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
!ifdef BACKWARD_DECOMPRESS {
GETMATCH_ADJ_HI
DEC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
} else {
GETMATCH_ADJ_HI
INC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
}
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
STA OFFSHI ; store high 8 bits of offset
STX OFFSLO
SEC ; substract dest - match offset
LDA PUTDST+1
OFFSLO = *+1
SBC #$AA ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
SHIFT_TABLE
!BYTE $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
!BYTE $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
!BYTE $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02
!BYTE $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03
!BYTE $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04
!BYTE $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05
!BYTE $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06
!BYTE $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BEQ PUTDST_ADJ_HI
DEC PUTDST+1
RTS
PUTDST_ADJ_HI
DEC PUTDST+2
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BEQ GETSRC_ADJ_HI
DEC GETSRC+1
PLA
RTS
GETSRC_ADJ_HI
DEC GETSRC+2
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BEQ PUTDST_ADJ_HI
RTS
PUTDST_ADJ_HI
INC PUTDST+2
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BEQ GETSRC_ADJ_HI
RTS
GETSRC_ADJ_HI
INC GETSRC+2
RTS
}

View File

@ -0,0 +1,359 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2_FAST
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$18 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS_DIRECT ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
JMP PREPARE_COPY_LITERALS_DIRECT
PREPARE_COPY_LITERALS_LARGE
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
BCS PREPARE_COPY_LITERALS_HIGH ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter)
PREPARE_COPY_LITERALS
LSR ; shift literals count into place
LSR
LSR
PREPARE_COPY_LITERALS_DIRECT
TAX
BCS PREPARE_COPY_LITERALS_LARGE ; if so, literals count is large
PREPARE_COPY_LITERALS_HIGH
TXA
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
ROL ; carry: Z bit; A: xxxxxxx1 (carry known set from BCS OFFSET_9_BIT)
ADC #$00 ; if Z bit is set, add 1 to A (bit 0 of A is now 0), otherwise bit 0 is 1
ORA #$FE ; set offset bits 15-9 to 1. reversed Z is already in bit 0
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
SEC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
SBC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
ADC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BEQ GETMATCH_ADJ_HI
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
!ifdef BACKWARD_DECOMPRESS {
GETMATCH_ADJ_HI
DEC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
} else {
GETMATCH_ADJ_HI
INC COPY_MATCH_LOOP+2
JMP GETMATCH_DONE
}
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCC NEED_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
NEED_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BEQ PUTDST_ADJ_HI
DEC PUTDST+1
RTS
PUTDST_ADJ_HI
DEC PUTDST+2
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BEQ GETSRC_ADJ_HI
DEC GETSRC+1
PLA
RTS
GETSRC_ADJ_HI
DEC GETSRC+2
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BEQ PUTDST_ADJ_HI
RTS
PUTDST_ADJ_HI
INC PUTDST+2
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BEQ GETSRC_ADJ_HI
RTS
GETSRC_ADJ_HI
INC GETSRC+2
RTS
}

View File

@ -0,0 +1,282 @@
; ***************************************************************************
; ***************************************************************************
;
; lzsa1_6502.s
;
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA1 format.
;
; This code is written for the ACME assembler.
;
; The code is 165 bytes for the small version, and 191 bytes for the normal.
;
; Copyright John Brandwood 2021.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
;
; Decompression Options & Macros
;
;
; Choose size over decompression speed (within sane limits)?
;
LZSA_SMALL_SIZE = 0
; ***************************************************************************
; ***************************************************************************
;
; Data usage is last 7 bytes of zero-page.
;
lzsa_cmdbuf = $F9 ; 1 byte.
lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word.
lzsa_dstptr = $FE ; 1 word.
lzsa_offset = lzsa_winptr
LZSA_SRC_LO = $FC
LZSA_SRC_HI = $FD
LZSA_DST_LO = $FE
LZSA_DST_HI = $FF
; ***************************************************************************
; ***************************************************************************
;
; lzsa1_unpack - Decompress data stored in Emmanuel Marty's LZSA1 format.
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
;
DECOMPRESS_LZSA1_FAST:
lzsa1_unpack: ldy #0 ; Initialize source index.
ldx #0 ; Initialize hi-byte of length.
;
; Copy bytes from compressed source data.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: !if LZSA_SMALL_SIZE {
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .cp_skip0
inc <lzsa_srcptr + 1
}
.cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
and #$70 ; Extract literal length.
lsr ; Set CC before ...
beq .lz_offset ; Skip directly to match?
lsr ; Get 3-bit literal length.
lsr
lsr
cmp #$07 ; Extended length?
bcc .cp_got_len
jsr .get_length ; X=0, CS from CMP, returns CC.
stx .cp_npages + 1 ; Hi-byte of length.
.cp_got_len: tax ; Lo-byte of length.
.cp_byte: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0
bne .cp_skip1
inc <lzsa_srcptr + 1
.cp_skip1: inc <lzsa_dstptr + 0
bne .cp_skip2
inc <lzsa_dstptr + 1
.cp_skip2: dex
bne .cp_byte
.cp_npages: lda #0 ; Any full pages left to copy?
beq .lz_offset
dec .cp_npages + 1 ; Unlikely, so can be slow.
bcc .cp_byte ; Always true!
!if LZSA_SMALL_SIZE {
;
; Copy bytes from decompressed window.
;
; Shorter but slower version.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.lz_offset: jsr .get_byte ; Get offset-lo.
.offset_lo: adc <lzsa_dstptr + 0 ; Always CC from .cp_page loop.
sta <lzsa_winptr + 0
lda #$FF
bit <lzsa_cmdbuf
bpl .offset_hi
jsr .get_byte ; Get offset-hi.
.offset_hi: adc <lzsa_dstptr + 1 ; lzsa_winptr < lzsa_dstptr, so
sta <lzsa_winptr + 1 ; always leaves CS.
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 - 1 ; CS from previous ADC.
cmp #$12 ; Extended length?
bcc .lz_got_len
jsr .get_length ; CS from CMP, X=0, returns CC.
stx .lz_npages + 1 ; Hi-byte of length.
.lz_got_len: tax ; Lo-byte of length.
.lz_byte: lda (lzsa_winptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .lz_page loop.
inc <lzsa_winptr + 0
bne .lz_skip1
inc <lzsa_winptr + 1
.lz_skip1: inc <lzsa_dstptr + 0
bne .lz_skip2
inc <lzsa_dstptr + 1
.lz_skip2: dex
bne .lz_byte
.lz_npages: lda #0 ; Any full pages left to copy?
beq .cp_length
dec .lz_npages + 1 ; Unlikely, so can be slow.
bcc .lz_byte ; Always true!
} else {
;
; Copy bytes from decompressed window.
;
; Longer but faster.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.lz_offset: lda (lzsa_srcptr),y ; Get offset-lo.
inc <lzsa_srcptr + 0
bne .offset_lo
inc <lzsa_srcptr + 1
.offset_lo: sta <lzsa_offset + 0
lda #$FF ; Get offset-hi.
bit <lzsa_cmdbuf
bpl .offset_hi
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .offset_hi
inc <lzsa_srcptr + 1
.offset_hi: sta <lzsa_offset + 1
.lz_length: lda <lzsa_cmdbuf ; X=0 from previous loop.
and #$0F
adc #$03 ; Always CC from .cp_page loop.
cmp #$12 ; Extended length?
bcc .got_lz_len
jsr .get_length ; X=0, CS from CMP, returns CC.
.got_lz_len: inx ; Hi-byte of length+256.
eor #$FF ; Negate the lo-byte of length
tay
eor #$FF
.get_lz_dst: adc <lzsa_dstptr + 0 ; Calc address of partial page.
sta <lzsa_dstptr + 0 ; Always CC from previous CMP.
iny
bcs .get_lz_win
beq .get_lz_win ; Is lo-byte of length zero?
dec <lzsa_dstptr + 1
.get_lz_win: clc ; Calc address of match.
adc <lzsa_offset + 0 ; N.B. Offset is negative!
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_byte: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_byte
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_more
jmp .cp_length ; Loop around to the beginning.
.lz_more: inc <lzsa_winptr + 1 ; Unlikely, so can be slow.
bne .lz_byte ; Always true!
}
;
; Get 16-bit length in X:A register pair, return with CC.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.get_length: clc ; Add on the next byte to get
adc (lzsa_srcptr),y ; the length.
inc <lzsa_srcptr + 0
bne .skip_inc
inc <lzsa_srcptr + 1
.skip_inc: bcc .got_length ; No overflow means done.
clc ; MUST return CC!
tax ; Preserve overflow value.
.extra_byte: jsr .get_byte ; So rare, this can be slow!
pha
txa ; Overflow to 256 or 257?
beq .extra_word
.check_length: pla ; Length-lo.
bne .got_length ; Check for zero.
dex ; Do one less page loop if so.
.got_length: rts
.extra_word: jsr .get_byte ; So rare, this can be slow!
tax
bne .check_length ; Length-hi == 0 at EOF.
.finished: pla ; Length-lo.
pla ; Decompression completed, pop
pla ; return address.
rts
.get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
bne .got_byte
inc <lzsa_srcptr + 1 ; Inc & test for bank overflow.
.got_byte: rts

View File

@ -0,0 +1,308 @@
; ***************************************************************************
; ***************************************************************************
;
; lzsa2_6502.s
;
; NMOS 6502 decompressor for data stored in Emmanuel Marty's LZSA2 format.
;
; This code is written for the ACME assembler.
;
; The code is 241 bytes for the small version, and 256 bytes for the normal.
;
; Copyright John Brandwood 2021.
;
; Distributed under the Boost Software License, Version 1.0.
; (See accompanying file LICENSE_1_0.txt or copy at
; http://www.boost.org/LICENSE_1_0.txt)
;
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
; ***************************************************************************
;
; Decompression Options & Macros
;
;
; Choose size over decompression speed (within sane limits)?
;
LZSA_SMALL_SIZE = 0
; ***************************************************************************
; ***************************************************************************
;
; Data usage is last 11 bytes of zero-page.
;
lzsa_length = lzsa_winptr ; 1 word.
lzsa_cmdbuf = $F5 ; 1 byte.
lzsa_nibflg = $F6 ; 1 byte.
lzsa_nibble = $F7 ; 1 byte.
lzsa_offset = $F8 ; 1 word.
lzsa_winptr = $FA ; 1 word.
lzsa_srcptr = $FC ; 1 word.
lzsa_dstptr = $FE ; 1 word.
lzsa_length = lzsa_winptr ; 1 word.
LZSA_SRC_LO = $FC
LZSA_SRC_HI = $FD
LZSA_DST_LO = $FE
LZSA_DST_HI = $FF
; ***************************************************************************
; ***************************************************************************
;
; lzsa2_unpack - Decompress data stored in Emmanuel Marty's LZSA2 format.
;
; Args: lzsa_srcptr = ptr to compessed data
; Args: lzsa_dstptr = ptr to output buffer
;
DECOMPRESS_LZSA2_FAST:
lzsa2_unpack: ldx #$00 ; Hi-byte of length or offset.
ldy #$00 ; Initialize source index.
sty <lzsa_nibflg ; Initialize nibble buffer.
;
; Copy bytes from compressed source data.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
.cp_length: !if LZSA_SMALL_SIZE {
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .cp_skip0
inc <lzsa_srcptr + 1
}
.cp_skip0: sta <lzsa_cmdbuf ; Preserve this for later.
and #$18 ; Extract literal length.
beq .lz_offset ; Skip directly to match?
lsr ; Get 2-bit literal length.
lsr
lsr
cmp #$03 ; Extended length?
bcc .cp_got_len
jsr .get_length ; X=0 for literals, returns CC.
stx .cp_npages + 1 ; Hi-byte of length.
.cp_got_len: tax ; Lo-byte of length.
.cp_byte: lda (lzsa_srcptr),y ; CC throughout the execution of
sta (lzsa_dstptr),y ; of this .cp_page loop.
inc <lzsa_srcptr + 0
bne .cp_skip1
inc <lzsa_srcptr + 1
.cp_skip1: inc <lzsa_dstptr + 0
bne .cp_skip2
inc <lzsa_dstptr + 1
.cp_skip2: dex
bne .cp_byte
.cp_npages: lda #0 ; Any full pages left to copy?
beq .lz_offset
dec .cp_npages + 1 ; Unlikely, so can be slow.
bcc .cp_byte ; Always true!
;
; Copy bytes from decompressed window.
;
; N.B. X=0 is expected and guaranteed when we get here.
;
; xyz
; ===========================
; 00z 5-bit offset
; 01z 9-bit offset
; 10z 13-bit offset
; 110 16-bit offset
; 111 repeat offset
;
.lz_offset: lda <lzsa_cmdbuf
asl
bcs .get_13_16_rep
.get_5_9_bits: dex ; X=$FF for a 5-bit offset.
asl
bcs .get_9_bits ; Fall through if 5-bit.
.get_13_bits: asl ; Both 5-bit and 13-bit read
php ; a nibble.
jsr .get_nibble
plp
rol ; Shift into position, clr C.
eor #$E1
cpx #$00 ; X=$FF for a 5-bit offset.
bne .set_offset
sbc #2 ; 13-bit offset from $FE00.
bne .set_hi_8 ; Always NZ from previous SBC.
.get_9_bits: asl ; X=$FF if CC, X=$FE if CS.
bcc .get_lo_8
dex
bcs .get_lo_8 ; Always CS from previous BCC.
.get_13_16_rep: asl
bcc .get_13_bits ; Shares code with 5-bit path.
.get_16_rep: bmi .lz_length ; Repeat previous offset.
.get_16_bits: jsr .get_byte ; Get hi-byte of offset.
.set_hi_8: tax
.get_lo_8: !if LZSA_SMALL_SIZE {
jsr .get_byte ; Get lo-byte of offset.
} else {
lda (lzsa_srcptr),y ; Get lo-byte of offset.
inc <lzsa_srcptr + 0
bne .set_offset
inc <lzsa_srcptr + 1
}
.set_offset: sta <lzsa_offset + 0 ; Save new offset.
stx <lzsa_offset + 1
.lz_length: ldx #1 ; Hi-byte of length+256.
lda <lzsa_cmdbuf
and #$07
clc
adc #$02
cmp #$09 ; Extended length?
bcc .got_lz_len
jsr .get_length ; X=1 for match, returns CC.
inx ; Hi-byte of length+256.
.got_lz_len: eor #$FF ; Negate the lo-byte of length.
tay
eor #$FF
.get_lz_dst: adc <lzsa_dstptr + 0 ; Calc address of partial page.
sta <lzsa_dstptr + 0 ; Always CC from previous CMP.
iny
bcs .get_lz_win
beq .get_lz_win ; Is lo-byte of length zero?
dec <lzsa_dstptr + 1
.get_lz_win: clc ; Calc address of match.
adc <lzsa_offset + 0 ; N.B. Offset is negative!
sta <lzsa_winptr + 0
lda <lzsa_dstptr + 1
adc <lzsa_offset + 1
sta <lzsa_winptr + 1
.lz_byte: lda (lzsa_winptr),y
sta (lzsa_dstptr),y
iny
bne .lz_byte
inc <lzsa_dstptr + 1
dex ; Any full pages left to copy?
bne .lz_more
jmp .cp_length ; Loop around to the beginning.
.lz_more: inc <lzsa_winptr + 1 ; Unlikely, so can be slow.
bne .lz_byte ; Always true!
;
; Lookup tables to differentiate literal and match lengths.
;
.nibl_len_tbl: !byte 3 ; 0+3 (for literal).
!byte 9 ; 2+7 (for match).
.byte_len_tbl: !byte 18 - 1 ; 0+3+15 - CS (for literal).
!byte 24 - 1 ; 2+7+15 - CS (for match).
;
; Get 16-bit length in X:A register pair, return with CC.
;
.get_length: jsr .get_nibble
cmp #$0F ; Extended length?
bcs .byte_length
adc .nibl_len_tbl,x ; Always CC from previous CMP.
.got_length: ldx #$00 ; Set hi-byte of 4 & 8 bit
rts ; lengths.
.byte_length: jsr .get_byte ; So rare, this can be slow!
adc .byte_len_tbl,x ; Always CS from previous CMP.
bcc .got_length
beq .finished
.word_length: clc ; MUST return CC!
jsr .get_byte ; So rare, this can be slow!
pha
jsr .get_byte ; So rare, this can be slow!
tax
pla
bne .got_word ; Check for zero lo-byte.
dex ; Do one less page loop if so.
.got_word: rts
.get_byte: lda (lzsa_srcptr),y ; Subroutine version for when
inc <lzsa_srcptr + 0 ; inlining isn't advantageous.
bne .got_byte
inc <lzsa_srcptr + 1
.got_byte: rts
.finished: pla ; Decompression completed, pop
pla ; return address.
rts
;
; Get a nibble value from compressed data in A.
;
.get_nibble: lsr <lzsa_nibflg ; Is there a nibble waiting?
lda <lzsa_nibble ; Extract the lo-nibble.
bcs .got_nibble
inc <lzsa_nibflg ; Reset the flag.
!if LZSA_SMALL_SIZE {
jsr .get_byte
} else {
lda (lzsa_srcptr),y
inc <lzsa_srcptr + 0
bne .set_nibble
inc <lzsa_srcptr + 1
}
.set_nibble: sta <lzsa_nibble ; Preserve for next time.
lsr ; Extract the hi-nibble.
lsr
lsr
lsr
.got_nibble: and #$0F
rts

View File

@ -1,170 +1,270 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
DECOMPRESS_LZSA1
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
LSR A ; shift literals count into place
LSR A
LSR A
LSR A
CMP #$07 ; LITERALS_RUN_LEN?
BCC PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS ; (*like JMP PREPARE_COPY_LITERALS but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
BYTE $A9 ; mask TAX (faster than BCS)
PREPARE_COPY_LITERALS
TAX
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$0FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
JSR PUTDST ; copy to destination
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
DECOMPRESS_LZSA1
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
LSR ; shift literals count into place
LSR
LSR
LSR
CMP #$07 ; LITERALS_RUN_LEN?
BCC PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS ; (*like JMP PREPARE_COPY_LITERALS but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
TXA
PREPARE_COPY_LITERALS
TAX
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
DEC COPY_MATCH_LOOP+2
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
STA OFFSHI ; store high 8 bits of offset
STX OFFSLO
SEC ; substract dest - match offset
LDA PUTDST+1
OFFSLO = *+1
SBC #$AA ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BNE PUTDST_DONE
DEC PUTDST+2
PUTDST_DONE
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BNE GETSRC_DONE
DEC GETSRC+2
GETSRC_DONE
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
}

View File

@ -1,239 +1,332 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
LSR A ; shift literals count into place
LSR A
LSR A
CMP #$03 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
PREPARE_COPY_LITERALS
TAX
BCC PREPARE_COPY_LITERALS_HIGH ; if not, literals count is complete
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_LITERALS_HIGH
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$0FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
;;ASL ; shift Z (offset bit 8) in place
ROL
ROL
AND #$01
EOR #$FF ; set offset bits 15-9 to 1
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
;;ASL ; XYZ=111?
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
ADC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
JSR PUTDST ; copy to destination
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
COMBINEDBITZ
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCS HAS_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
HAS_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
LSR ; shift literals count into place
LSR
LSR
CMP #$03 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
PREPARE_COPY_LITERALS
TAX
BCC PREPARE_COPY_LITERALS_HIGH ; if not, literals count is complete
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_LITERALS_HIGH
TXA
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
ROL ; carry: Z bit; A: xxxxxxx1 (carry known set from BCS OFFSET_9_BIT)
ADC #$00 ; if Z bit is set, add 1 to A (bit 0 of A is now 0), otherwise bit 0 is 1
ORA #$FE ; set offset bits 15-9 to 1. reversed Z is already in bit 0
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
SEC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
SBC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA PUTDST+2
OFFSHI = *+1
SBC #$AA ; high 8 bits
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
} else {
; Forward decompression - add match offset
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
OFFSLO = *+1
ADC #$AA
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
LDA COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
DEC COPY_MATCH_LOOP+2
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
}
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCS HAS_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
HAS_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
LDA PUTDST+1
BNE PUTDST_DONE
DEC PUTDST+2
PUTDST_DONE
DEC PUTDST+1
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
PHA
LDA GETSRC+1
BNE GETSRC_DONE
DEC GETSRC+2
GETSRC_DONE
DEC GETSRC+1
PLA
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS
}

281
asm/65816/decompress_v1.asm Normal file
View File

@ -0,0 +1,281 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI/LZSA_SRC_BANK contain the compressed raw block address
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK contain the destination buffer address
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI/LZSA_SRC_BANK must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI/BANK contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019-2020 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
!cpu 65816
DECOMPRESS_LZSA1
SEP #$30
!as
!rs
LDY #$00
DECODE_TOKEN
JSR GETSRC ; read token byte: O|LLL|MMMM
PHA ; preserve token on stack
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$70 ; LITERALS_RUN_LEN?
BNE PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$F9 ; (LITERALS_RUN_LEN)
BCC PREPARE_COPY_LITERALS_DIRECT
BEQ LARGE_VARLEN_LITERALS ; if adding up to zero, go grab 16-bit count
JSR GETSRC ; get single extended byte of variable literals count
INY ; add 256 to literals count
BCS PREPARE_COPY_LITERALS_DIRECT ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
TXA
BCS PREPARE_COPY_LARGE_LITERALS ; (*like JMP PREPARE_COPY_LITERALS_DIRECT but shorter)
PREPARE_COPY_LITERALS
TAX
LDA SHIFT_TABLE-1,X ; shift literals length into place
; -1 because position 00 is reserved
PREPARE_COPY_LITERALS_DIRECT
TAX
PREPARE_COPY_LARGE_LITERALS
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI GET_LONG_OFFSET ; $80: 16 bit offset
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
SHORT_VARLEN_MATCHLEN
JSR GETSRC ; get single extended byte of variable match len
INY ; add 256 to match length
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
REP #$20
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
}
SEP #$20
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
BEQ DECODE_TOKEN ; (*like JMP DECODE_TOKEN but shorter)
GET_LONG_OFFSET ; handle 16 bit offset:
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
STA OFFSHI ; store high 8 bits of offset
STX OFFSLO
SEC ; substract dest - match offset
REP #$20
!al
LDA PUTDST+1
OFFSLO = *+1
OFFSHI = *+2
SBC #$AAAA ; 16 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
SEP #$20
!as
SEC
} else {
; Forward decompression - add match offset
STA OFFSHI ; store high 8 bits of offset
TXA
CLC ; add dest + match offset
ADC PUTDST+1 ; low 8 bits
STA COPY_MATCH_LOOP+1 ; store back reference address
OFFSHI = *+1
LDA #$AA ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
}
LDA PUTDST+3 ; bank
STA COPY_MATCH_LOOP+3 ; store back reference address
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
ADC #$02 ; plus carry which is always set by the high ADC
CMP #$12 ; MATCH_RUN_LEN?
BCC PREPARE_COPY_MATCH ; if not, count is directly embedded in token
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; add MATCH_RUN_LEN and MIN_MATCH_SIZE to match length
BCC PREPARE_COPY_MATCH
BNE SHORT_VARLEN_MATCHLEN
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BNE PREPARE_COPY_MATCH_Y ; if not, continue
DECOMPRESSION_DONE
RTS
SHIFT_TABLE
!BYTE $00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00,$00
!BYTE $01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01,$01
!BYTE $02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02,$02
!BYTE $03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03,$03
!BYTE $04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04,$04
!BYTE $05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05,$05
!BYTE $06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06,$06
!BYTE $07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07,$07
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
LZSA_DST_BANK = *+3
STA $AAAAAA
REP #$20
DEC PUTDST+1
SEP #$20
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LZSA_SRC_BANK = *+3
LDA $AAAAAA
REP #$20
DEC GETSRC+1
SEP #$20
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
LZSA_DST_BANK = *+3
STA $AAAAAA
REP #$20
INC PUTDST+1
SEP #$20
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LZSA_SRC_BANK = *+3
LDA $AAAAAA
REP #$20
INC GETSRC+1
SEP #$20
RTS
}

338
asm/65816/decompress_v2.asm Normal file
View File

@ -0,0 +1,338 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI/LZSA_SRC_BANK contain the compressed raw block address
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK contain the destination buffer address
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK contain the last decompressed byte address, +1
;
; -----------------------------------------------------------------------------
; Backward decompression is also supported, use lzsa -r -b -f2 <original_file> <compressed_file>
; To use it, also define BACKWARD_DECOMPRESS=1 before including this code!
;
; in:
; * LZSA_SRC_LO/LZSA_SRC_HI/LZSA_SRC_BANK must contain the address of the last byte of compressed data
; * LZSA_DST_LO/LZSA_DST_HI/LZSA_DST_BANK must contain the address of the last byte of the destination buffer
;
; out:
; * LZSA_DST_LO/LZSA_DST_HI/BANK contain the last decompressed byte address, -1
;
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019-2020 Emmanuel Marty, Peter Ferrie
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
!cpu 65816
NIBCOUNT = $FC ; zero-page location for temp offset
DECOMPRESS_LZSA2
SEP #$30
!as
!rs
LDY #$00
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$18 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
; add nibble to len from token
ADC #$02 ; (LITERALS_RUN_LEN_V2) minus carry
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BCC PREPARE_COPY_LITERALS_DIRECT ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$EE ; overflow?
BRA PREPARE_COPY_LITERALS_DIRECT
PREPARE_COPY_LITERALS_LARGE
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
BCS PREPARE_COPY_LITERALS_HIGH ; (*same as JMP PREPARE_COPY_LITERALS_HIGH but shorter)
PREPARE_COPY_LITERALS
LSR ; shift literals count into place
LSR
LSR
PREPARE_COPY_LITERALS_DIRECT
TAX
BCS PREPARE_COPY_LITERALS_LARGE ; if so, literals count is large
PREPARE_COPY_LITERALS_HIGH
TXA
BEQ COPY_LITERALS
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
ASL
BCS REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BCS OFFSET_9_BIT
; 00Z: 5 bit offset
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
;;ASL ; shift Z (offset bit 8) in place
ROL
ROL
AND #$01
EOR #$FF ; set offset bits 15-9 to 1
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BCS REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
;;ASL ; XYZ=111?
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETSRC ; grab high 8 bits
GOT_OFFSET_HI
TAX
JSR GETSRC ; grab low 8 bits
GOT_OFFSET_LO
STA OFFSLO ; store low byte of match offset
STX OFFSHI ; store high byte of match offset
REP_MATCH
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
SEC ; add dest + match offset
REP #$20
!al
LDA PUTDST+1 ; 16 bits
OFFSLO = *+1
OFFSHI = *+2
SBC #$AAAA
STA COPY_MATCH_LOOP+1 ; store back reference address
SEP #$20
!as
SEC
} else {
; Forward decompression - add match offset
CLC ; add dest + match offset
REP #$20
!al
LDA PUTDST+1 ; 16 bits
OFFSLO = *+1
OFFSHI = *+2
ADC #$AAAA
STA COPY_MATCH_LOOP+1 ; store back reference address
SEP #$20
!as
}
LDA PUTDST+3 ; bank
STA COPY_MATCH_LOOP+3 ; store back reference address
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
ADC #$01 ; add MIN_MATCH_SIZE_V2 and carry
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BCC PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
; add nibble to len from token
ADC #$08 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) minus carry
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BCC PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
; the carry is always set by the CMP above
; GETSRC doesn't change it
SBC #$E8 ; overflow?
PREPARE_COPY_MATCH
TAX
BCC PREPARE_COPY_MATCH_Y ; if not, the match length is complete
BEQ DECOMPRESSION_DONE ; if EOD code, bail
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
PREPARE_COPY_MATCH_Y
TXA
BEQ COPY_MATCH_LOOP
INY
COPY_MATCH_LOOP
LDA $AAAAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
REP #$20
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
DEC COPY_MATCH_LOOP+1
} else {
; Forward decompression -- put backreference bytes forward
INC COPY_MATCH_LOOP+1
}
SEP #$20
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETCOMBINEDBITS
EOR #$80
ASL
PHP
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
PLP ; merge Z bit as the carry bit (for offset bit 0)
COMBINEDBITZ
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
DECOMPRESSION_DONE
RTS
GETNIBBLE
NIBBLES = *+1
LDA #$AA
LSR NIBCOUNT
BCC NEED_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
NEED_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR
LSR
LSR
LSR
SEC
RTS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
LZSA_DST_BANK = *+3
STA $AAAAAA
REP #$20
DEC PUTDST+1
SEP #$20
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LZSA_SRC_BANK = *+3
LDA $AAAAAA
REP #$20
DEC GETSRC+1
SEP #$20
RTS
} else {
; Forward decompression -- get and put bytes forward
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
LZSA_DST_BANK = *+3
STA $AAAAAA
REP #$20
INC PUTDST+1
SEP #$20
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LZSA_SRC_BANK = *+3
LDA $AAAAAA
REP #$20
INC GETSRC+1
SEP #$20
RTS
}

90
asm/6809/unlzsa1-6309.s Normal file
View File

@ -0,0 +1,90 @@
; unlzsa1-6309.s - Hitachi 6309 decompression routine for raw LZSA1 - 92 bytes
; compress with lzsa -f1 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty, Doug Masten
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1 equ lz1token
lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest
lz1gotof leau d,y ; put backreference start address in U (dst+offset)
ldd #$000f ; clear MSB match length and set mask for MMMM
andb ,s+ ; isolate MMMM (embedded match length) in token
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,x+ ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
tstb
bne lz1gotln ; go copy matched bytes if not
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,x+ ; grab low 8 bits of len in B
lz1gotln tfr d,w ; set W with match length for TFM instruction
tfm u+,y+ ; copy match bytes
lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,x+ ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,x+ ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr d,w ; set W with literals count for TFM instruction
tfm x+,y+ ; copy literal bytes
lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
lda #$ff ; set high 8 bits
bra lz1gotof

102
asm/6809/unlzsa1.s Normal file
View File

@ -0,0 +1,102 @@
; unlzsa1.s - 6809 decompression routine for raw LZSA1 - 110 bytes
; compress with lzsa -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1 equ lz1token
lz1bigof lda ,x+ ; O set: load MSB 16-bit (negative, signed) offest
lz1gotof leau d,y ; put backreference start address in U (dst+offset)
ldd #$000f ; clear MSB match length and set mask for MMMM
andb ,s+ ; isolate MMMM (embedded match length) in token
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,x+ ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
tstb
bne lz1gotln ; go copy matched bytes if not
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,x+ ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,x+ ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,x+ ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,x+ ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt leau ,x
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
leax ,u
lz1nolt ldb ,x+ ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
lda #$ff ; set high 8 bits
bra lz1gotof

92
asm/6809/unlzsa1b-6309.s Normal file
View File

@ -0,0 +1,92 @@
; unlzsa1-6309.s - H6309 backward decompressor for raw LZSA1 - 97 bytes
; compress with lzsa -f1 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty, Doug Masten
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1
leax 1,x
bra lz1token
lz1bigof ldd ,--x ; O set: load long 16-bit (negative, signed) offest
lz1gotof negd ; reverse sign of offset in D
leau d,y ; put backreference start address in U (dst+offset)
ldd #$000f ; clear MSB match length and set mask for MMMM
andb ,s+ ; isolate MMMM (embedded match length) in token
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
leay 1,y ; adjust pointer to first byte of decompressed data
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln tfr d,w ; set W with match length for TFM instruction
tfm u-,y- ; copy match bytes
lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,-x ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,-x ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt tfr d,w ; set W with literals count for TFM instruction
leax -1,x ; tfm is post-decrement
tfm x-,y- ; copy literal bytes
leax 1,x
lz1nolt ldb ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
ldb ,-x ; load either 8-bit or LSB 16-bit offset (negative, signed)
lda #$ff ; set high 8 bits
bra lz1gotof

105
asm/6809/unlzsa1b.s Normal file
View File

@ -0,0 +1,105 @@
; unlzsa1b.s - 6809 backward decompression routine for raw LZSA1 - 113 bytes
; compress with lzsa -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa1
leax 1,x
leay 1,y
bra lz1token
lz1bigof ldd ,--x ; O set: load long 16 bit (negative, signed) offset
lz1gotof nega ; reverse sign of offset in D
negb
sbca #0
leau d,y ; put backreference start address in U (dst+offset)
ldd #$000f ; clear MSB match length and set mask for MMMM
andb ,s+ ; isolate MMMM (embedded match length) in token
addb #$03 ; add MIN_MATCH_SIZE
cmpb #$12 ; MATCH_RUN_LEN?
bne lz1gotln ; no, we have the full match length, go copy
addb ,-x ; add extra match length byte + MIN_MATCH_SIZE + MATCH_RUN_LEN
bcc lz1gotln ; if no overflow, we have the full length
bne lz1midln
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
bne lz1gotln ; check if we hit EOD (16-bit length = 0)
rts ; done, bail
lz1midln tfr b,a ; copy high part of len into A
ldb ,-x ; grab low 8 bits of len in B
lz1gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz1cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz1cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lz1token ldb ,-x ; load next token into B: O|LLL|MMMM
pshs b ; save it
andb #$70 ; isolate LLL (embedded literals count) in B
beq lz1nolt ; skip if no literals
cmpb #$70 ; LITERALS_RUN_LEN?
bne lz1declt ; if not, we have the complete count, go unshift
ldb ,-x ; load extra literals count byte
addb #$07 ; add LITERALS_RUN_LEN
bcc lz1gotla ; if no overflow, we got the complete count, copy
bne lz1midlt
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz1gotlt ; we now have the complete count, go copy
lz1midlt tfr b,a ; copy high part of literals count into A
ldb ,-x ; load low 8 bits of literals count
bra lz1gotlt ; we now have the complete count, go copy
lz1declt lsrb ; shift literals count into place
lsrb
lsrb
lsrb
lz1gotla clra ; clear A (high part of literals count)
lz1gotlt leau ,x
tfr d,x ; transfer 16-bit count into X
lz1cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz1cpylt ; loop until all literal bytes are copied
leax ,u
lz1nolt ldb ,s ; get token again, don't pop it from the stack
bmi lz1bigof ; test O bit (small or large offset)
ldb ,-x ; O clear: load 8 bit (negative, signed) offset
lda #$ff ; set high 8 bits
bra lz1gotof

129
asm/6809/unlzsa2-6309.s Normal file
View File

@ -0,0 +1,129 @@
; unlzsa2-6309.s - Hitachi 6309 decompression routine for raw LZSA2 - 150 bytes
; compress with lzsa -f2 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty, Doug Masten
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
clr lz2nibct ; reset nibble available flag
bra lz2token
lz2nibct fcb 0 ; nibble ready flag
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
ldd ,x++ ; load high then low 8 bits of offset
lz2gotof std lz2moff+2 ; store match offset
lz2repof ldd #$0007 ; clear MSB match length and set mask for MMM
andb ,u ; isolate MMM (embedded match length) in token
lz2moff leau $aaaa,y ; put backreference start address in U (dst+offset)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
lz2gotln tfr d,w ; set W with match count for TFM instruction
tfm u+,y+ ; copy match bytes
lz2token tfr x,u ; save token address
ldb ,x+ ; load next token into B: XYZ|LL|MMM
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr d,w ; set W with literals count for TFM instruction
tfm x+,y+ ; copy literal bytes
lz2nolt ldb ,u ; get token again
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2nibl ldb #$aa
com lz2nibct ; nibble ready?
bpl lz2gotnb
ldb ,x+ ; load two nibbles
stb lz2nibl+1 ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts

146
asm/6809/unlzsa2.s Normal file
View File

@ -0,0 +1,146 @@
; unlzsa2.s - 6809 decompression routine for raw LZSA2 - 169 bytes
; compress with lzsa -f2 -r <original_file> <compressed_file>
;
; in: x = start of compressed data
; y = start of decompression buffer
; out: y = end of decompression buffer + 1
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
clr <lz2nibct,pcr ; reset nibble available flag
lz2token ldb ,x+ ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,x+ ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldb ,x+ ; load low 8 bits of little-endian literals count
lda ,x+ ; load high 8 bits of literal count
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt leau ,x
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,u+ ; copy literal byte
sta ,y+
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
leax ,u
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
com <lz2nibct,pcr ; toggle nibble ready flag and check
bpl lz2gotnb
ldb ,x+ ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
ldb ,x+ ; load low 8 bits of (negative, signed) offset
bra lz2gotof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
ldd ,x++ ; load high then low 8 bits of offset
lz2gotof std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
ldd #$0007 ; clear MSB match length and set mask for MMM
andb ,s+ ; isolate MMM (embedded match length) in token
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,x+ ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldb ,x+ ; load 16-bit len in D (low part in B, high in A)
lda ,x+ ; (little endian)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,u+ ; copy matched byte
sta ,y+
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

133
asm/6809/unlzsa2b-6309.s Normal file
View File

@ -0,0 +1,133 @@
; unlzsa2b-6309.s - H6309 backward decompressor for raw LZSA2 - 155 bytes
; compress with lzsa -f2 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty, Doug Masten
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
clr lz2nibct ; reset nibble available flag
leax 1,x ; adjust compressed data pointer
bra lz2token
lz2nibct fcb 0 ; nibble ready flag
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; push token's Z flag bit into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
bra lz2lowof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
lda ,-x ; load high 8 bits of (negative, signed) offset
lz2lowof ldb ,-x ; load low 8 bits of offset
lz2gotof negd ; reverse sign of offset in D
std lz2moff+2 ; store match offset
lz2repof ldd #$0007 ; clear MSB match length and set mask for MMM
andb ,u ; isolate MMM (embedded match length) in token
lz2moff leau $aaaa,y ; put backreference start address in U (dst+offset)
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,-x ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
lz2gotln tfr d,w ; set W with match count for TFM instruction
tfm u-,y- ; copy match bytes
lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
tfr x,u ; save token address
andb #$18 ; isolate LL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,-x ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz2gotlt ; we now have the complete count, go copy
lz2nibl com lz2nibct ; nibble ready?
bpl lz2gotnb
ldb ,-x ; load two nibbles
stb lz2gotnb+1 ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
rts
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt tfr d,w ; set W with literals count for TFM instruction
leax -1,x ; tfm is post-decrement
tfm x-,y- ; copy literal bytes
leax 1,x
lz2nolt ldb ,u ; get token again
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
bra lz2lowof
lz2done leay 1,y ; adjust pointer to first byte of decompressed data and then exit
lz2gotnb ldb #$aa ; load nibble
andb #$0f ; only keep low 4 bits
rts

152
asm/6809/unlzsa2b.s Normal file
View File

@ -0,0 +1,152 @@
; unlzsa2b.s - 6809 backward decompression routine for raw LZSA2 - 171 bytes
; compress with lzsa -f2 -r -b <original_file> <compressed_file>
;
; in: x = last byte of compressed data
; y = last byte of decompression buffer
; out: y = first byte of decompressed data
;
; Copyright (C) 2020 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
decompress_lzsa2
clr <lz2nibct,pcr ; reset nibble available flag
leax 1,x
leay 1,y
lz2token ldb ,-x ; load next token into B: XYZ|LL|MMM
pshs b ; save it
andb #$18 ; isolate LLL (embedded literals count) in B
beq lz2nolt ; skip if no literals
cmpb #$18 ; LITERALS_RUN_LEN_V2?
bne lz2declt ; if not, we have the complete count, go unshift
bsr lz2nibl ; get extra literals length nibble in B
addb #$03 ; add LITERALS_RUN_LEN_V2
cmpb #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
bne lz2gotla ; if not, we have the full literals count, go copy
addb ,-x ; add extra literals count byte + LITERALS_RUN_LEN + 15
bcc lz2gotla ; if no overflow, we got the complete count, copy
ldd ,--x ; load 16 bit count in D (low part in B, high in A)
bra lz2gotlt ; we now have the complete count, go copy
lz2declt lsrb ; shift literals count into place
lsrb
lsrb
lz2gotla clra ; clear A (high part of literals count)
lz2gotlt leau ,x
tfr d,x ; transfer 16-bit count into X
lz2cpylt lda ,-u ; copy literal byte
sta ,-y
leax -1,x ; decrement X and update Z flag
bne lz2cpylt ; loop until all literal bytes are copied
leax ,u
lz2nolt ldb ,s ; get token again, don't pop it from the stack
lslb ; push token's X flag bit into carry
bcs lz2replg ; if token's X bit is set, rep or large offset
lslb ; push token's Y flag bit into carry
sex ; push token's Z flag bit into reg A (carry flag is not effected)
bcs lz2offs9 ; if token's Y bit is set, 9 bits offset
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 5-7 of offset, reverse bit 0
sex ; set bits 8-15 of offset to $FF
bra lz2gotof
lz2offs9 deca ; set bits 9-15 of offset, reverse bit 8
bra lz2lowof
lz2nibct fcb $00 ; nibble ready flag
lz2nibl ldb #$aa
com <lz2nibct,pcr ; toggle nibble ready flag and check
bpl lz2gotnb
ldb ,-x ; load two nibbles
stb <lz2nibl+1,pcr ; store nibble for next time (low 4 bits)
lsrb ; shift 4 high bits of nibble down
lsrb
lsrb
lsrb
lz2gotnb andb #$0f ; only keep low 4 bits
lz2done rts
lz2replg lslb ; push token's Y flag bit into carry
bcs lz2rep16 ; if token's Y bit is set, rep or 16 bit offset
sex ; push token's Z flag bit into reg A
bsr lz2nibl ; get offset nibble in B
lsla ; retrieve token's Z flag bit and push into carry
rolb ; shift Z flag from carry into bit 0 of B
eorb #$e1 ; set bits 13-15 of offset, reverse bit 8
tfr b,a ; copy bits 8-15 of offset into A
suba #$02 ; substract 512 from offset
bra lz2lowof
lz2rep16 bmi lz2repof ; if token's Z flag bit is set, rep match
lda ,-x ; load high 8 bits of (negative, signed) offset
lz2lowof ldb ,-x ; load low 8 bits of offset
lz2gotof nega ; reverse sign of offset in D
negb
sbca #0
std <lz2repof+2,pcr ; store match offset
lz2repof leau $aaaa,y ; put backreference start address in U (dst+offset)
ldd #$0007 ; clear MSB match length and set mask for MMM
andb ,s+ ; isolate MMM (embedded match length) in token
addb #$02 ; add MIN_MATCH_SIZE_V2
cmpb #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
bne lz2gotln ; no, we have the full match length, go copy
bsr lz2nibl ; get offset nibble in B
addb #$09 ; add MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2
cmpb #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
bne lz2gotln ; if not, we have the full match length, go copy
addb ,-x ; add extra length byte + MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15
bcc lz2gotln ; if no overflow, we have the full length
beq lz2done ; detect EOD code
ldd ,--x ; load 16-bit len in D (low part in B, high in A)
lz2gotln pshs x ; save source compressed data pointer
tfr d,x ; copy match length to X
lz2cpymt lda ,-u ; copy matched byte
sta ,-y
leax -1,x ; decrement X
bne lz2cpymt ; loop until all matched bytes are copied
puls x ; restore source compressed data pointer
lbra lz2token ; go decode next token

View File

@ -1,32 +1,135 @@
; lzsa1fta.asm time-efficient decompressor implementation for 8086 CPUs.
; Turbo Assembler IDEAL mode dialect; can also be assembled with NASM.
; lzsa2fta.asm time-efficient decompressor implementation for 808x CPUs.
; Turbo Assembler IDEAL mode dialect.
; (Is supposed to also assemble with NASM's IDEAL mode support, but YMMV.)
;
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
; This code assembles to about 3K of lookup tables and unrolled code,
; but the tradeoff for that size is the absolute fastest decompressor
; of LZSA1 block data for 808x CPUs.
; If you need moderately fast code with less size, see LZSA1FTA.ASM.
; If you need the smallest decompression code, see decompress_small_v1.S.
;
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
; Usual DOS assembler SMALL model assumptions apply. This code:
; - Assumes it was invoked via NEAR call (change RET to RETF for FAR calls)
; - Is interrupt-safe
; - Is not re-entrant (do not decompress while already running decompression)
; - Trashes all data and segment registers
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
; Copyright (C) 2019 Jim Leonard, Emmanuel Marty
; Additional speed optimizations by Pavel Zagrebin
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; ===========================================================================
;
; The key area to concentrate on when optimizing LZSA1 decompression speed is
; reducing time spent handling the shortest matches. This is for two reasons:
; 1. shorter matches are more common
; 2. short matches are least efficient in terms of decomp speed per byte
; You can confirm #1 using the --stats mode of the compressor.
;
; Branches are costly on 8086. To ensure we branch as little as possible, a
; jumptable will be used to branch directly to as many direct decode paths as
; possible. This will burn up 512 bytes of RAM for a jumptable, and a few
; hundred bytes of duplicated program code (rather than JMP/CALL common code
; blocks, we inline them to avoid the branch overhead).
;
; ===========================================================================
;
; === LZSA1 block reference:
;
; Blocks encoded as LZSA1 are composed from consecutive commands.
; Each command follows this format:
;
; token: <O|LLL|MMMM>
; optional extra literal length
; literal values
; match offset low
; optional match offset high
; optional extra encoded match length
;
;
; === LZSA1 Token Reference:
;
; 7 6 5 4 3 2 1 0
; O L L L M M M M
;
; L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for
; this command is 0 to 6, the length is encoded in the token and no extra bytes
; are required. Otherwise, a value of 7 is encoded and extra bytes follow as
; 'optional extra literal length'
;
; M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the
; encoded match length for this command is 0 to 14, it is directly stored,
; otherwise 15 is stored and extra bytes follow as 'optional extra encoded match
; length'. Except for the last command in a block, a command always contains a
; match, so the encoded match length is the actual match length, offset by the
; minimum which is 3 bytes. For instance, an actual match length of 10 bytes to
; be copied, is encoded as 7.
;
; O: set for a 2-bytes match offset, clear for a 1-byte match offset
;
;
; === Decoding extended literal length:
;
; If the literals length is 7 or more, then an extra byte follows here, with
; three possible values:
;
; 0-248: the value is added to the 7 stored in the token.
; 250: a second byte follows. The final literals value is 256 + the second byte.
; 249: a little-endian 16-bit value follows, forming the final literals value.
;
;
; === Decoding match offsets:
;
; match offset low: The low 8 bits of the match offset follows.
;
; optional match offset high: If the 'O' bit (bit 7) is set in the token, the
; high 8 bits of the match offset follow, otherwise they are understood to be all
; set to 1. For instance, a short offset of 0x70 is interpreted as 0xff70
;
;
; === Decoding extra encoded match length:
;
; optional extra encoded match length: If the encoded match length is 15 or more,
; the 'M' bits in the token form the value 15, and an extra byte follows here,
; with three possible types of value.
;
; 0-237: the value is added to the 15 stored in the token.
; The final value is 3 + 15 + this byte.
; 239: a second byte follows. The final match length is 256 + the second byte.
; 238: a second and third byte follow, forming a little-endian 16-bit value.
; The final encoded match length is that 16-bit value.
;
; ===========================================================================
IDEAL
P8086
IDEAL ; Use Turbo Assembler IDEAL syntax checking
P8086 ; Restrict code generation to the 808x and later
JUMPS ; Perform fixups for out-of-bound conditional jumps
; This is required for the (L=07 & M=0Fh) decode paths as they
; have the most code, but these are uncommon paths so the
; tiny speed loss in just these paths is not a concern.
;Setting OPTIMIZE_LONG_RLE to 1 speeds up decompressing long runs of the
;same 16-bit word value, but hurts decompression speed of other data
;types slightly. Turn this on if you know your data has very long 16-bit
;word-based runs (reported as RLE2 sequences in the LZSA compressor output
;with an average length of at least 32 bytes), otherwise leave it off.
OPTIMIZE_LONG_RLE EQU 0
SEGMENT CODE para public
@ -34,241 +137,445 @@ ASSUME cs:CODE, ds:CODE
PUBLIC lzsa1_decompress_speed_jumptable
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
; EQU helper statements (so we can construct a jump table without going crazy)
;Jump table for handling LLL bits in initial LZSA1 tokens.
;Previous code would SHR val,4 to get a count from 0 to 7, then rep movsb.
;We can overload the shift operation into a jump table that jumps directly
;to optimized copying routine for 0-7 bytes. Must declare in code segment.
;Note: If this looks strange for declaring a jump table, that's because it
;is a workaround for the Turbo Pascal harness that tests it. Turbo Pascal
;treats OFFSET (label) as a relocatble item and throws an error, so we fool
;it by building the table with absolute EQU/literals instead.
L0b EQU OFFSET check_offset_size
L1b EQU OFFSET copy1b
L2b EQU OFFSET copy2b
L3b EQU OFFSET copy3b
L4b EQU OFFSET copy4b
L5b EQU OFFSET copy5b
L6b EQU OFFSET copy6b
L7b EQU OFFSET need_length_byte
copytable DW L0b,L0b,L0b,L0b,L0b,L0b,L0b,L0b
DW L1b,L1b,L1b,L1b,L1b,L1b,L1b,L1b
DW L2b,L2b,L2b,L2b,L2b,L2b,L2b,L2b
DW L3b,L3b,L3b,L3b,L3b,L3b,L3b,L3b
DW L4b,L4b,L4b,L4b,L4b,L4b,L4b,L4b
DW L5b,L5b,L5b,L5b,L5b,L5b,L5b,L5b
DW L6b,L6b,L6b,L6b,L6b,L6b,L6b,L6b
DW L7b,L7b,L7b,L7b,L7b,L7b,L7b,L7b
minmatch EQU 3
litrunlen EQU 7
leml1 EQU OFFSET lit_ext_mat_len_1b
leme1 EQU OFFSET lit_ext_mat_ext_1b
leml2 EQU OFFSET lit_ext_mat_len_2b
leme2 EQU OFFSET lit_ext_mat_ext_2b
;short-circuit special cases for 0 through 6 literal copies:
l6ml1 EQU OFFSET lit_len_mat_len_1b_6
l6me1 EQU OFFSET lit_len_mat_ext_1b
l6ml2 EQU OFFSET lit_len_mat_len_2b_6
l6me2 EQU OFFSET lit_len_mat_ext_2b
l5ml1 EQU OFFSET lit_len_mat_len_1b_45
l5me1 EQU OFFSET lit_len_mat_ext_1b + 1
l5ml2 EQU OFFSET lit_len_mat_len_2b_45
l5me2 EQU OFFSET lit_len_mat_ext_2b + 1
l4ml1 EQU OFFSET lit_len_mat_len_1b_45 + 1
l4me1 EQU OFFSET lit_len_mat_ext_1b + 2
l4ml2 EQU OFFSET lit_len_mat_len_2b_45 + 1
l4me2 EQU OFFSET lit_len_mat_ext_2b + 2
l3ml1 EQU OFFSET lit_len_mat_len_1b_23
l3me1 EQU OFFSET lit_len_mat_ext_1b + 3
l3ml2 EQU OFFSET lit_len_mat_len_2b_23
l3me2 EQU OFFSET lit_len_mat_ext_2b + 3
l2ml1 EQU OFFSET lit_len_mat_len_1b_23 + 1
l2me1 EQU OFFSET lit_len_mat_ext_1b + 4
l2ml2 EQU OFFSET lit_len_mat_len_2b_23 + 1
l2me2 EQU OFFSET lit_len_mat_ext_2b + 4
l1ml1 EQU OFFSET lit_len_mat_len_1b_01
l1me1 EQU OFFSET lit_len_mat_ext_1b + 5
l1ml2 EQU OFFSET lit_len_mat_len_2b_01
l1me2 EQU OFFSET lit_len_mat_ext_2b + 5
l0ml1 EQU OFFSET lit_len_mat_len_1b_01 + 1 ; MMMM handling comes after LLL code
l0me1 EQU OFFSET lit_len_mat_ext_1b + 6 ; MMMM handling comes after LLL code
l0ml2 EQU OFFSET lit_len_mat_len_2b_01 + 1 ; MMMM handling comes after LLL code
l0me2 EQU OFFSET lit_len_mat_ext_2b + 6 ; MMMM handling comes after LLL code
; 0 1 2 3 4 5 6 7 8 9 a b c d e f
jtbl DW l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0ml1,l0me1 ;0
DW l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1ml1,l1me1 ;1
DW l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2ml1,l2me1 ;2
DW l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3ml1,l3me1 ;3
DW l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4ml1,l4me1 ;4
DW l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5ml1,l5me1 ;5
DW l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6ml1,l6me1 ;6
DW leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leml1,leme1 ;7
DW l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0ml2,l0me2 ;8
DW l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1ml2,l1me2 ;9
DW l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2ml2,l2me2 ;a
DW l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3ml2,l3me2 ;b
DW l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4ml2,l4me2 ;c
DW l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5ml2,l5me2 ;d
DW l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6ml2,l6me2 ;e
DW leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leml2,leme2 ;f
PROC lzsa1_decompress_speed_jumptable NEAR
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
MACRO get_byte_match_offset
mov ah,0ffh ;O=0, so set up offset's high byte
lodsb ;load low byte; ax=match offset
xchg bp,ax ;bp=match offset ax=00 + original token
ENDM
MACRO get_word_match_offset
lodsw ;ax=match offset
xchg bp,ax ;bp=match offset ax=00 + original token
ENDM
MACRO do_match_copy_long
LOCAL even0,even1,even2,do_run,do_run_w
; Copies a long match as optimally as possible.
; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
; trashes: ax, bx
; must leave cx=0 at exit
mov bx,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
lea si,[bp+di] ;si = output buffer + negative match offset
cmp bp,-2 ;do we have a byte/word run to optimize?
IF OPTIMIZE_LONG_RLE
jae do_run ;catch offset = -2 or -1
ELSE
ja do_run ;catch offset = -1
ENDIF
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in @@do_run) So, let's copy faster with REP MOVSW.
;This affects 8088 only slightly, but is a bigger win on 8086 and higher.
shr cx,1
jnc even0
movsb
even0:
rep movsw
xchg si,ax ;restore si
mov ds,bx ;restore ds
jmp decode_token
do_run:
IF OPTIMIZE_LONG_RLE
je do_run_w ;if applicable, handle word-sized value faster
ENDIF
xchg dx,ax ;save si into dx, as ax is getting trashed
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
jnc even1
stosb
even1:
rep stosw ;perform word run
mov si,dx ;restore si
mov ds,bx ;restore ds
jmp decode_token
IF OPTIMIZE_LONG_RLE
do_run_w:
xchg dx,ax ;save si into dx, as ax is getting trashed
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
jnc even2
stosb ;should be after rep stosw!
even2:
mov si,dx ;restore si
mov ds,bx ;restore ds
jmp decode_token
ENDIF
ENDM
MACRO do_match_copy
; Copies a shorter match with as little overhead as possible.
; requirements: cx=length, bp=negative offset, ds:si=compdata, es:di=output
; trashes: ax, bx
; must leave cx=0 at exit
mov bx,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
lea si,[bp+di] ;si = output buffer + negative match offset
movsb
movsb
movsb ;Handle MINMATCH (instead of add cx,MINMATCH)
rep movsb
xchg si,ax ;restore si
mov ds,bx ;restore ds
jmp decode_token
ENDM
MACRO do_literal_copy
LOCAL even
; Copies a literal sequence using words.
; Meant for longer lengths; for 128 bytes or less, use REP MOVSB.
; requirements: cx=length, ds:si=compdata, es:di=output
; must leave cx=0 at exit
shr cx,1
jnc even
movsb
even:
rep movsw
ENDM
MACRO copy_small_match_len
and al,0FH ;isolate length in token (MMMM)
xchg cx,ax ;cx=match length
do_match_copy ;copy match with cx=length, bp=offset
ENDM
MACRO copy_large_match_len
LOCAL val239,val238,EOD
; Handle MMMM=Fh
; Assumptions: ah=0 from get_????_match_offset's xchg
lodsb ;grab extra match length byte
add al,0Fh+minmatch ;add MATCH_RUN_LEN + MIN_MATCH_SIZE
; jz val238 ;if zf & cf, 238: get 16-bit match length
jc val239 ;if cf, 239: get extra match length byte
xchg cx,ax ;otherwise, we have our match length
do_match_copy_long ;copy match with cx=length, bp=offset
val239:
jz val238
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_match_copy_long ;copy match with cx=length, bp=offset
val238:
lodsw ;grab 16-bit length
xchg cx,ax
jcxz EOD ;is it the EOD marker? Exit if so
do_match_copy_long ;copy match with cx=length, bp=offset
EOD:
jmp done_decompressing
ENDM
lzsa1_start:
push di ;remember decompression offset
cld ;ensure string ops move forward
xor cx,cx
@@decode_token:
xchg cx,ax ;clear ah (cx = 0 from match copy's rep movsb)
decode_token:
xchg cx,ax ;clear ah (cx = 0 from match copy's REP)
lodsb ;read token byte: O|LLL|MMMM
mov dx,ax ;copy our token to dl for later MMMM handling
mov bp,ax ;preserve 0+token in bp for later MMMM handling
mov bx,ax ;prep for table lookup
shl bx,1 ;adjust for offset word size
jmp [cs:jtbl+bx] ;jump directly to relevant decode path
and al,070H ;isolate literals length in token (LLL)
jz check_offset_size ;if LLL=0, we have no literals; goto match
; There are eight basic decode paths for an LZSA1 token. Each of these
; paths perform only the necessary actions to decode the token and then
; fetch the next token. This results in a lot of code duplication, but
; it is the only way to get down to two branches per token (jump to unique
; decode path, then jump back to next token) for the most common cases.
; Jump to short copy routine for LLL=1 though 6, need_length_byte for LLL=7
mov bx,ax ;prep for table lookup (must copy, don't XCHG!)
jmp [cs:copytable+bx]
; Path #1: LLL=0-6, MMMM=0-Eh, O=0 (1-byte match offset)
; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
lit_len_mat_len_1b_01:
movsb
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_23:
movsb
movsw
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_45:
movsb
movsw
movsw
get_byte_match_offset
copy_small_match_len
lit_len_mat_len_1b_6:
movsw
movsw
movsw
get_byte_match_offset
copy_small_match_len
need_length_byte:
lodsb ;grab extra length byte
add al,07H ;add LITERALS_RUN_LEN
jnc @@got_literals_exact ;if no overflow, we have full count
je @@big_literals
; Path #2: LLL=0-6, MMMM=Fh, O=0 (1-byte match offset)
lit_len_mat_ext_1b:
movsb
movsb
movsb
movsb
movsb
movsb
get_byte_match_offset
copy_large_match_len
@@mid_literals:
lodsb ;grab single extra length byte
inc ah ;add 256
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw ;We don't need to account for overlap because
adc cx,0 ;source for literals isn't the output buffer.
rep movsb
jmp check_offset_size
@@big_literals:
lodsw ;grab 16-bit extra length
xchg cx,ax ;with longer counts, we can save some time
shr cx,1 ;by doing a word copy instead of a byte copy.
rep movsw
adc cx,0
rep movsb
jmp check_offset_size
; Used for counts 7-248. In test data, average value around 1Ah. YMMV.
@@got_literals_exact:
; Path #3: LLL=7, MMMM=0-Eh, O=0 (1-byte match offset)
lit_ext_mat_len_1b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_3 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_3 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_byte_match_offset
copy_small_match_len
@@val250_3:
jz @@val249_3
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
rep movsb ;copy cx literals from ds:si to es:di
jmp check_offset_size
;Literal copy sequence for lengths 1-6:
copy6b: movsb
copy5b: movsb
copy4b: movsb
copy3b: movsb
copy2b: movsb
copy1b: movsb
;Literals done; fall through to match offset determination
check_offset_size:
test dl,dl ;check match offset size in token (O bit)
js @@get_long_offset ;load absolute 16-bit match offset
mov ah,0ffh ;set up high byte
lodsb ;load low byte
@@get_match_length:
xchg dx,ax ;dx: match offset ax: original token
and al,0FH ;isolate match length in token (MMMM)
cmp al,0FH ;MATCH_RUN_LEN?
jne @@got_matchlen_short ;no, we have the full match length from the token, go copy
lodsb ;grab extra length byte
add al,012H ;add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc @@do_long_copy ;if no overflow, we have the entire length
jne @@mid_matchlen
do_literal_copy
get_byte_match_offset
copy_small_match_len
@@val249_3:
lodsw ;grab 16-bit length
xchg cx,ax ;get ready to do a long copy
jcxz @@done_decompressing ;wait, is it the EOD marker? Exit if so
jmp @@copy_len_preset ;otherwise, do the copy
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_small_match_len
@@got_matchlen_short:
add al,3 ;add MIN_MATCH_SIZE
xchg cx,ax ;copy match length into cx
mov bp,ds ;save ds
mov ax,es
mov ds,ax ;ds=es
xchg ax,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
rep movsb ;copy match
xchg si,ax ;restore si
mov ds,bp ;restore ds
jmp @@decode_token ;go decode another token
@@done_decompressing:
; Path #4: LLL=7, MMMM=Fh, O=0 (1-byte match offset)
lit_ext_mat_ext_1b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_4 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_4 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_byte_match_offset
copy_large_match_len
@@val250_4:
jz @@val249_4
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_large_match_len
@@val249_4:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_byte_match_offset
copy_large_match_len
; Path #5: LLL=0-6, MMMM=0-Eh, O=1 (2-byte match offset)
; Handle LLL=0-6 by jumping directly into # of bytes to copy (6 down to 1)
lit_len_mat_len_2b_01:
movsb
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_23:
movsb
movsw
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_45:
movsb
movsw
movsw
get_word_match_offset
copy_small_match_len
lit_len_mat_len_2b_6:
movsw
movsw
movsw
get_word_match_offset
copy_small_match_len
; Path #6: LLL=0-6, MMMM=Fh, O=1 (2-byte match offset)
; Path #6: LLL=0-6, MMMM=Fh, O=1 (2-byte match offset)
lit_len_mat_ext_2b:
movsb
movsb
movsb
movsb
movsb
movsb
get_word_match_offset
copy_large_match_len
; Path #7: LLL=7, MMMM=0-Eh, O=1 (2-byte match offset)
lit_ext_mat_len_2b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_7 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_7 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_word_match_offset
copy_small_match_len
@@val250_7:
jz @@val249_7
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_small_match_len
@@val249_7:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_small_match_len
; Path #8: LLL=7, MMMM=Fh, O=1 (2-byte match offset)
lit_ext_mat_ext_2b:
; on entry: ax=0 + token, bp=ax
lodsb ;grab extra literal length byte
add al,litrunlen ;add 7h literal run length
; jz @@val249_8 ;if zf & cf, 249: get 16-bit literal length
jc @@val250_8 ;if cf, 250: get extra literal length byte
xchg cx,ax ;otherwise, we have our literal length
do_literal_copy ;this might be better as rep movsw !!! benchmark
get_word_match_offset
copy_large_match_len
@@val250_8:
jz @@val249_8
lodsb ;ah=0; grab single extra length byte
inc ah ;ax=256+length byte
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_large_match_len
@@val249_8:
lodsw ;grab 16-bit length
xchg cx,ax
do_literal_copy
get_word_match_offset
copy_large_match_len
done_decompressing:
;return # of decompressed bytes in ax
pop ax ;retrieve the original decompression offset
xchg di,ax ;compute decompressed size
sub ax,di
sub di,ax ;adjust for original offset
xchg di,ax ;return adjusted value in ax
ret ;done decompressing, exit to caller
;These are called less often; moved here to optimize the fall-through case
@@get_long_offset:
lodsw ;Get 2-byte match offset
jmp @@get_match_length
;With a confirmed longer match length, we have an opportunity to optimize for
;the case where a single byte is repeated long enough that we can benefit
;from rep movsw to perform the run (instead of rep movsb).
@@mid_matchlen:
lodsb ;grab single extra length byte
inc ah ;add 256
@@do_long_copy:
xchg cx,ax ;copy match length into cx
@@copy_len_preset:
push ds ;save ds
mov bp,es
mov ds,bp ;ds=es
mov bp,si ;save si
mov si,di ;ds:si now points at back reference in output data
add si,dx
cmp dx,-2 ;do we have a byte/word run to optimize?
jae @@do_run ;perform a run
;You may be tempted to change "jae" to "jge" because DX is a signed number.
;Don't! The total window is 64k, so if you treat this as a signed comparison,
;you will get incorrect results for offsets over 32K.
;If we're here, we have a long copy and it isn't byte-overlapping (if it
;overlapped, we'd be in @@do_run_1) So, let's copy faster with REP MOVSW.
;This won't affect 8088 that much, but it speeds up 8086 and higher.
shr cx,1
rep movsw
adc cx,0
rep movsb
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
@@do_run:
je @@do_run_2 ;fall through to byte (common) if not word run
@@do_run_1:
lodsb ;load first byte of run into al
mov ah,al
shr cx,1
rep stosw ;perform word run
adc cx,0
rep stosb ;finish word run
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
@@do_run_2:
lodsw ;load first word of run
shr cx,1
rep stosw ;perform word run
adc cx,0 ;despite 2-byte offset, compressor might
rep stosb ;output odd length. better safe than sorry.
mov si,bp ;restore si
pop ds
jmp @@decode_token ;go decode another token
ENDP lzsa1_decompress_speed_jumptable
ENDS CODE
END
;Speed optimization history (decompression times in microseconds @ 4.77 MHz):
; original E. Marty code shuttle 123208 alice 65660 robotron 407338 ***
; table for shr al,4 shuttle 120964 alice 63230 robotron 394733 +++
; push/pop to mov/mov shuttle 118176 alice 61835 robotron 386762 +++
; movsw for literalcpys shuttle 124102 alice 64908 robotron 400220 --- rb
; stosw for byte runs shuttle 118897 alice 65040 robotron 403518 --- rb
; better stosw for runs shuttle 117712 alice 65040 robotron 403343 +--
; disable RLE by default shuttle 116924 alice 60783 robotron 381226 +++
; optimize got_matchlen shuttle 115294 alice 59588 robotron 374330 +++
; fall through to getML shuttle 113258 alice 59572 robotron 372004 +++
; fall through to midLI shuttle 113258 alice 59572 robotron 375060 ..- rb
; fall through midMaLen shuttle 113247 alice 59572 robotron 372004 +.+
; movsw for litlen > 255 shuttle 113247 alice 59572 robotron 371612 ..+
; rep stosw for long runs shuttle 113247 alice 59572 robotron 371612 ...
; rep movsw for long cpys shuttle 113247 alice 59572 robotron 371035 ..+
; xchg/dec ah -> mov ah,val shuttle 112575 alice 59272 robotron 369198 +++
; force >12h len.to longcpy shuttle 101998 alice 59266 robotron 364459 +.+
; more efficient run branch shuttle 102239 alice 59297 robotron 364716 --- rb
; even more eff. run branch shuttle 101998 alice 59266 robotron 364459 ***
; BUGFIX - bad sign compare shuttle 101955 alice 59225 robotron 364117 +++
; reverse 16-bit len compar shuttle 102000 alice 59263 robotron 364460 --- rb
; jcxz for EOD detection no change to speed, but is 1 byte shorter +++
; force movsw for literals shuttle 107183 alice 62555 robotron 379524 --- rb
; defer shr4 until necessry shuttle 102069 alice 60236 robotron 364096 ---
; skip literals if LLL=0 shuttle 98655 alice 57849 robotron 363358 ---
; fall through to mid_liter shuttle 98595 alice 57789 robotron 361998 +++
; == jumptable experiments begin ==
; jumptable for small copys shuttle 101594 alice 61078 robotron 386018 ---
; start:xchg instead of mov shuttle 100948 alice 60467 robotron 381112 +++
; use table for LLL=0 check shuttle 106972 alice 63333 robotron 388304 --- rb
; jmptbl to fallthrough mov shuttle 102532 alice 60760 robotron 383070 ---
; cpy fallthrough check_ofs shuttle 98939 alice 58917 robotron 371019 +**
; single jumptable jump shuttle 97528 alice 57264 robotron 362194 ++*
; conditional check for L=7 shuttle 98610 alice 58521 robotron 368153 --- rb
; defer add MIN_MATCH_SIZE shuttle 97207 alice 57200 robotron 362884 ++*
; jumptable rewrite, no RLE shuttle 97744 alice 46905 robotron 309032 -++
; adc cx,0 -> adc cl,0 shuttle 97744 alice 46893 robotron 309032 .+.!
; jumptable rewrite w/RLE shuttle 88776 alice 50433 robotron 319222 +--
; short match copies movsb shuttle 97298 alice 49769 robotron 326282 ---rb
; long match copy #1 16-bit shuttle 92490 alice 46905 robotron 308722 +*+
; long match copy #2 extraB shuttle 92464 alice 46905 robotron 308371 +.+
; long match copy #3 0f->ed shuttle 86765 alice 46864 robotron 303895 +++!
; baseline new test harness shuttle 83925 alice 37948 robotron 269002 ***
; Pavel optimizations shuttle 82225 alice 36798 robotron 261226 +++
; OPTIMIZE_LONG_RLE 1 shuttle 82242 alice 36787 robotron 261392 **-
;
;------
;
;Pavel's optimization history:
; shuttle alice robotron time in 1.193 MHz timer clocks
;baseline 19109 D9A6 570F6
;adc cl,0->adc cl,cl 19035 D9A6 56FAB
;rep movsb->shr cx,1;jnc 18FD4 D998 56F14
;cmp bp,-2->inc bp;inc bp 18F07 D999 56EA3
;jz;jc->jc 18D81 D973 56B2F
;add al,3->movsb x3 18B1E D777 56197
;more lit_len_mat tables 18A83 D341 54ACC

View File

@ -0,0 +1,120 @@
; decompress_small_v1.asm - space-efficient decompressor implementation for x86
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 32
; ---------------------------------------------------------------------------
; Decompress raw LZSA1 block
; inputs:
; * esi: raw LZSA1 block
; * edi: output buffer
; output:
; * eax: decompressed size
; ---------------------------------------------------------------------------
%ifndef BIN
global lzsa1_decompress
global _lzsa1_decompress
%endif
lzsa1_decompress:
_lzsa1_decompress:
pushad
;mov edi, [esp+32+4] ; edi = outbuf
;mov esi, [esp+32+8] ; esi = inbuf
xor ecx, ecx
.decode_token:
mul ecx
lodsb ; read token byte: O|LLL|MMMM
mov dl, al ; keep token in dl
and al, 070H ; isolate literals length in token (LLL)
shr al, 4 ; shift literals length into place
cmp al, 07H ; LITERALS_RUN_LEN?
jne .got_literals ; no, we have the full literals count from the token, go copy
lodsb ; grab extra length byte
add al, 07H ; add LITERALS_RUN_LEN
jnc .got_literals ; if no overflow, we have the full literals count, go copy
jne .mid_literals
lodsw ; grab 16-bit extra length
jmp .got_literals
.mid_literals:
lodsb ; grab single extra length byte
inc ah ; add 256
.got_literals:
xchg ecx, eax
rep movsb ; copy cx literals from ds:si to es:di
test dl, dl ; check match offset size in token (O bit)
js .get_long_offset
dec ecx
xchg eax, ecx ; clear ah - cx is zero from the rep movsb above
lodsb
jmp .get_match_length
.get_long_offset:
lodsw ; Get 2-byte match offset
.get_match_length:
xchg eax, edx ; edx: match offset eax: original token
and al, 0FH ; isolate match length in token (MMMM)
add al, 3 ; add MIN_MATCH_SIZE
cmp al, 012H ; MATCH_RUN_LEN?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
add al,012H ; add MIN_MATCH_SIZE + MATCH_RUN_LEN
jnc .got_matchlen ; if no overflow, we have the entire length
jne .mid_matchlen
lodsw ; grab 16-bit length
test eax, eax ; bail if we hit EOD
je .done_decompressing
jmp .got_matchlen
.mid_matchlen:
lodsb ; grab single extra length byte
inc ah ; add 256
.got_matchlen:
xchg ecx, eax ; copy match length into ecx
xchg esi, eax
mov esi, edi ; esi now points at back reference in output data
movsx edx, dx ; sign-extend dx to 32-bits.
add esi, edx
rep movsb ; copy match
xchg esi, eax ; restore esi
jmp .decode_token ; go decode another token
.done_decompressing:
sub edi, [esp+32+4]
mov [esp+28], edi ; eax = decompressed size
popad
ret ; done

View File

@ -0,0 +1,181 @@
; decompress_small_v2.asm - space-efficient decompressor implementation for x86
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 32
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * esi: raw LZSA2 block
; * edi: output buffer
; output:
; * eax: decompressed size
; ---------------------------------------------------------------------------
%ifndef BIN
global lzsa2_decompress
global _lzsa2_decompress
%endif
lzsa2_decompress:
_lzsa2_decompress:
pushad
;mov edi, [esp+32+4] ; edi = outbuf
;mov esi, [esp+32+8] ; esi = inbuf
xor ecx, ecx
xor ebx, ebx ; ebx = 0100H
inc bh
xor ebp, ebp
.decode_token:
mul ecx
lodsb ; read token byte: XYZ|LL|MMMM
mov dl, al ; keep token in dl
and al, 018H ; isolate literals length in token (LL)
shr al, 3 ; shift literals length into place
cmp al, 03H ; LITERALS_RUN_LEN_V2?
jne .got_literals ; no, we have the full literals count from the token, go copy
call .get_nibble ; get extra literals length nibble
add al, cl ; add len from token to nibble
cmp al, 012H ; LITERALS_RUN_LEN_V2 + 15 ?
jne .got_literals ; if not, we have the full literals count, go copy
lodsb ; grab extra length byte
add al,012H ; overflow?
jnc .got_literals ; if not, we have the full literals count, go copy
lodsw ; grab 16-bit extra length
.got_literals:
xchg ecx, eax
rep movsb ; copy ecx literals from esi to edi
test dl, 0C0h ; check match offset mode in token (X bit)
js .rep_match_or_large_offset
;;cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit)
; discovered via the test with bit 6 set
xchg ecx, eax ; clear ah - cx is zero from the rep movsb above
jne .offset_9_bit
; 5 bit offset
cmp dl, 020H ; test bit 5
call .get_nibble_x
jmp .dec_offset_top
.offset_9_bit: ; 9 bit offset
lodsb ; get 8 bit offset from stream in A
dec ah ; set offset bits 15-8 to 1
test dl, 020H ; test bit Z (offset bit 8)
je .get_match_length
.dec_offset_top:
dec ah ; clear bit 8 if Z bit is clear
; or set offset bits 15-8 to 1
jmp .get_match_length
.rep_match_or_large_offset:
;;cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
jpe .rep_match_or_16_bit
; 13 bit offset
cmp dl, 0A0H ; test bit 5 (knowing that bit 7 is also set)
xchg ah, al
call .get_nibble_x
sub al, 2 ; substract 512
jmp .get_match_length_1
.rep_match_or_16_bit:
test dl, 020H ; test bit Z (offset bit 8)
jne .repeat_match ; rep-match
; 16 bit offset
lodsb ; Get 2-byte match offset
.get_match_length_1:
xchg ah, al
lodsb ; load match offset bits 0-7
.get_match_length:
xchg ebp, eax ; ebp: offset
.repeat_match:
xchg eax, edx ; ax: original token
and al, 07H ; isolate match length in token (MMM)
add al, 2 ; add MIN_MATCH_SIZE_V2
cmp al, 09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne .got_matchlen ; no, we have the full match length from the token, go copy
call .get_nibble ; get extra literals length nibble
add al, cl ; add len from token to nibble
cmp al, 018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
add al,018H ; overflow?
jnc .got_matchlen ; if not, we have the entire length
je .done_decompressing ; detect EOD code
lodsw ; grab 16-bit length
.got_matchlen:
xchg ecx, eax ; copy match length into ecx
xchg esi, eax
movsx ebp, bp ; sign-extend bp to 32-bits
lea esi,[ebp+edi] ; esi now points at back reference in output data
rep movsb ; copy match
xchg esi, eax ; restore esi
jmp .decode_token ; go decode another token
.done_decompressing:
sub edi, [esp+32+4]
mov [esp+28], edi
popad
ret ; done
.get_nibble_x:
cmc ; carry set if bit 4 was set
rcr al, 1
call .get_nibble ; get nibble for offset bits 0-3
or al, cl ; merge nibble
rol al, 1
xor al, 0E1H ; set offset bits 7-5 to 1
ret
.get_nibble:
neg bh ; nibble ready?
jns .has_nibble
xchg ebx, eax
lodsb ; load two nibbles
xchg ebx, eax
.has_nibble:
mov cl, 4 ; swap 4 high and low bits of nibble
ror bl, cl
mov cl, 0FH
and cl, bl
ret

210
asm/z80/unlzsa1_fast.asm Normal file
View File

@ -0,0 +1,210 @@
;
; Speed-optimized LZSA1 decompressor by spke & uniabis (113 bytes)
;
; ver.00 by spke for LZSA 0.5.4 (03-24/04/2019, 134 bytes);
; ver.01 by spke for LZSA 0.5.6 (25/04/2019, 110(-24) bytes, +0.2% speed);
; ver.02 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.03 by uniabis (30/07/2019, 109(-1) bytes, +3.5% speed);
; ver.04 by spke (31/07/2019, small re-organization of macros);
; ver.05 by uniabis (22/08/2019, 107(-2) bytes, same speed);
; ver.06 by spke for LZSA 1.0.7 (27/08/2019, 111(+4) bytes, +2.1% speed);
; ver.07 by spke for LZSA 1.1.0 (25/09/2019, added full revision history);
; ver.08 by spke for LZSA 1.1.2 (22/10/2019, re-organized macros and added an option for unrolled copying of long matches);
; ver.09 by spke for LZSA 1.2.1 (02/01/2020, 109(-2) bytes, same speed);
; ver.10 by spke (07/04/2021, 113(+4) bytes, +5% speed)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+51 byte)
; DEFINE BACKWARD_DECOMPRESS ; uncomment to decompress backward compressed data (-3% speed, +5 bytes)
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
; HL = DE+HL
add hl,de
ENDM
MACRO COPY1
ldi
ENDM
MACRO COPYBC
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
; HL = DE-HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO COPY1
ldd
ENDM
MACRO COPYBC
lddr
ENDM
ENDIF
@DecompressLZSA1:
ld b,0 : jr ReadToken
IFNDEF UNROLL_LONG_MATCHES
CopyMatch2: ld c,a
.UseC ex (sp),hl : jr CopyMatch.UseC
ENDIF
NoLiterals: xor (hl) : NEXT_HL : jp m,LongOffset
ShortOffset: push hl : ld l,(hl) : ld h,#FF
; short matches have length 0+3..14+3
add 3 : cp 15+3 : jr nc,LongerMatch
; placed here this saves a JP per iteration
CopyMatch: ld c,a ; BC = len, DE = dest, HL = offset, SP -> [src]
.UseC ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY1 : COPY1 : COPYBC ; BC = 0, DE = dest
.popSrc pop hl : NEXT_HL ; HL = src
ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ld a,(hl) : and #70 : jr z,NoLiterals
cp #70 : jr z,MoreLiterals ; LLL=7 means 7+ literals...
rrca : rrca : rrca : rrca : ld c,a ; LLL<7 means 0..6 literals...
ld a,(hl) : NEXT_HL
COPYBC
; the top bit of token is set if the offset contains two bytes
and #8F : jp p,ShortOffset
LongOffset: ; read second byte of the offset
ld c,(hl) : NEXT_HL : push hl : ld h,(hl) : ld l,c
add -128+3 : cp 15+3 : jp c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: ex (sp),hl : NEXT_HL : add (hl) : jr nc,CopyMatch2
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch2.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,CopyMatch2.UseC
pop bc : ret
ELSE
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: ex (sp),hl : NEXT_HL : add (hl) : jr c,VeryLongMatch
ld c,a
.UseC ex (sp),hl
ADD_OFFSET
COPY1 : COPY1
; this is an unrolled equivalent of LDIR
xor a : sub c
and 16-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 16
COPY1
EDUP
jp pe,.fastLDIR
jr CopyMatch.popSrc
VeryLongMatch: ; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,LongerMatch.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,LongerMatch.UseC
pop bc : ret
ENDIF
MoreLiterals: ; there are three possible situations here
xor (hl) : NEXT_HL : exa
ld a,7 : add (hl) : jr c,ManyLiterals
CopyLiterals: ld c,a
.UseC NEXT_HL : COPYBC
exa : jp p,ShortOffset : jr LongOffset
ManyLiterals:
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
.code0 NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC

View File

@ -1,20 +1,39 @@
;
; Size-optimized LZSA decompressor by spke (v.1 23/04/2019, 69 bytes)
; Size-optimized LZSA1 decompressor by spke & uniabis (67 bytes)
;
; ver.00 by spke for LZSA 0.5.4 (23/04/2019, 69 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by uniabis (30/07/2019, 68(-1) bytes, +3.2% speed);
; ver.03 by spke for LZSA 1.0.7 (31/07/2019, small re-organization of macros);
; ver.04 by spke (06/08/2019, 67(-1) bytes, -1.2% speed);
; ver.05 by spke for LZSA 1.1.0 (25/09/2019, added full revision history)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -r <sourcefile> <outfile>
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,CompressedData
; ld de,WhereToDecompress
; call DecompressLZSA
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; Of course, LZSA compression algorithm is (c) 2019 Emmanuel Marty,
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
@ -34,49 +53,80 @@
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
@DecompressLZSA:
; DEFINE BACKWARD_DECOMPRESS
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
@DecompressLZSA1:
ld b,0
; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ReadToken: ld a,(hl) : exa : ld a,(hl) : inc hl
ReadToken: ld a,(hl) : NEXT_HL : push af
and #70 : jr z,NoLiterals
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
cp #07 : call z,ReadLongBA ; LLL=7 means 7+ literals...
ld c,a : ldir
ld c,a : BLOCKCOPY
; next we read the low byte of the -offset
NoLiterals: push de : ld e,(hl) : inc hl : ld d,#FF
NoLiterals: pop af : push de : ld e,(hl) : NEXT_HL : ld d,#FF
; the top bit of token is set if
; the offset contains the high byte as well
exa : or a : jp p,ShortOffset
or a : jp p,ShortOffset
LongOffset: ld d,(hl) : inc hl
LongOffset: ld d,(hl) : NEXT_HL
; last but not least, the match length is read
ShortOffset: and #0F : add 3 ; MMMM<15 means match lengths 0+3..14+3
cp 15+3 : call z,ReadLongBA ; MMMM=15 means lengths 14+3+
ld c,a
ex (sp),hl : push hl ; BC = len, DE = -offset, HL = dest, SP ->[dest,src]
add hl,de : pop de ; BC = len, DE = dest, HL = dest+(-offset), SP->[src]
ldir : pop hl ; BC = 0, DE = dest, HL = src
jr ReadToken
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl : jr ReadToken ; HL = src
; a standard routine to read extended codes
; into registers B (higher byte) and A (lower byte).
ReadLongBA: add (hl) : inc hl : ret nc
ReadLongBA: add (hl) : NEXT_HL : ret nc
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1: ld b,a : ld a,(hl) : inc hl : ret nz
.code0: ld c,a : ld b,(hl) : inc hl
.code1: ld b,a : ld a,(hl) : NEXT_HL : ret nz
.code0: ld c,a : ld b,(hl) : NEXT_HL
; the two-byte match length equal to zero
; designates the end-of-data marker

265
asm/z80/unlzsa2_fast.asm Normal file
View File

@ -0,0 +1,265 @@
;
; Speed-optimized LZSA2 decompressor by spke & uniabis (210 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-07/06/2019, 218 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by spke for LZSA 1.0.6 (27/07/2019, fixed a bug in the backward decompressor);
; ver.03 by uniabis (30/07/2019, 213(-5) bytes, +3.8% speed and support for Hitachi HD64180);
; ver.04 by spke for LZSA 1.0.7 (01/08/2019, 214(+1) bytes, +0.2% speed and small re-organization of macros);
; ver.05 by spke (27/08/2019, 216(+2) bytes, +1.1% speed);
; ver.06 by spke for LZSA 1.1.0 (26/09/2019, added full revision history);
; ver.07 by spke for LZSA 1.1.1 (10/10/2019, +0.2% speed and an option for unrolled copying of long matches);
; ver.08 by spke (07-08/04/2022, 210(-6) bytes, +1.7% speed, using self-modifying code by default)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f2 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE UNROLL_LONG_MATCHES ; uncomment for faster decompression of very compressible data (+38 bytes)
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b (+5 bytes, -3.2% speed)
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ENDM
MACRO COPY1
ldi
ENDM
MACRO COPYBC
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
; HL = DE - HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO COPY1
ldd
ENDM
MACRO COPYBC
lddr
ENDM
ENDIF
@DecompressLZSA2:
; A' stores next nibble as %1111.... or assumed to contain trash
; B is assumed to be 0 in many places
ld b,0 : scf : exa : jr ReadToken
ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyMoreLiterals
ld c,(hl) : NEXT_HL
ld a,b : ld b,(hl)
jr ReadToken.NEXTHLuseBC
MoreLiterals: ld b,(hl) : NEXT_HL
scf : exa : jr nc,.noUpdate
; nibbles are read left-to-right; spare nibbles are kept in AF'
; and flag NC indicates that a nibble is available
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate or #F0
;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyMoreLiterals: ld c,a : ld a,b : ld b,0
COPY1
COPY1
COPYBC
or a : jp p,CASE0xx
cp %11000000 : jr c,CASE10x
; "111": repeated offset
CASE11x cp %11100000 : jr nc,MatchLen
; "110": 16-bit offset
CASE110: ld b,(hl) : NEXT_HL : jr ReadOffsetC
Literals0011: jr nz,MoreLiterals
; if "LL" of the byte token is equal to 0,
; there are no literals to copy
NoLiterals: or (hl) : NEXT_HL
jp m,CASE1xx
; short (5 or 9 bit long) offsets
CASE0xx cp %01000000 : jr c,CASE00x
; "01x": the case of the 9-bit offset
CASE01x: dec b : cp %01100000 : rl b
ReadOffsetC ld c,(hl) : NEXT_HL
SaveOffset ld (CopyMatch.PrevOffset),bc : ld b,0
MatchLen inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC push hl
.PrevOffset EQU $+1 : ld hl,0
ADD_OFFSET
COPY1
COPYBC
.popSrc pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 00 and 11 separately
rrca : rrca : rrca
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
COPYBC
; the token and literals are followed by the offset
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 13-bit offset
CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate or #F0 : ld b,a : ld a,c
cp %10100000 : dec b : rl b : jr ReadOffsetC
; "00x": the case of the 5-bit offset
CASE00x: ld b,a : exa : jr nc,.noUpdate
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate or #F0 : ld c,a : ld a,b
cp %00100000 : rl c
ld b,#FF : jr SaveOffset
LongerMatch: scf : exa : jr nc,.noUpdate
ld a,(hl) : or a : exa
ld a,(hl) : NEXT_HL
rrca : rrca : rrca : rrca
.noUpdate or #F0 : sub #F0-9 : cp 15+9 : jr c,CopyMatch
IFNDEF UNROLL_LONG_MATCHES
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
ret
ELSE
LongMatch: add (hl) : NEXT_HL : jr c,VeryLongMatch
ld c,a
.useC push hl
ld hl,(CopyMatch.PrevOffset)
ADD_OFFSET
; this is an unrolled equivalent of LDIR
xor a : sub c
and 8-1 : add a
ld (.jrOffset),a : jr nz,$+2
.jrOffset EQU $-1
.fastLDIR DUP 8
COPY1
EDUP
jp pe,.fastLDIR
jp CopyMatch.popSrc
VeryLongMatch: ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,LongMatch.useC
ret
ENDIF

View File

@ -1,167 +0,0 @@
;
; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019, 218 bytes)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,CompressedData
; ld de,WhereToDecompress
; call DecompressLZSA2
;
; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
@DecompressLZSA2:
; A' stores next nibble as %1111.... or assumed to contain trash
; B is assumed to be 0
xor a : ld b,a : exa : jr ReadToken
LongerMatch: exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : inc hl : or #0F
rrca : rrca : rrca : rrca
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
;inc a : jr z,LongMatch : sub #F0-9+1 : jp CopyMatch
LongMatch: ;ld a,24 :
add (hl) : inc hl : jr nc,CopyMatch
ld c,(hl) : inc hl
ld b,(hl) : inc hl
jr nz,CopyMatch.useC
pop de : ret
ManyLiterals: ld a,18 :
add (hl) : inc hl : jr nc,CopyLiterals
ld c,(hl) : inc hl
ld a,b : ld b,(hl) : inc hl
jr CopyLiterals.useBC
MoreLiterals: ld b,(hl) : inc hl
exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : inc hl : or #0F
rrca : rrca : rrca : rrca
.noUpdate ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a
.useC ld a,b : ld b,0
.useBC ldir
push de : or a : jp p,CASE0xx : jr CASE1xx
; if "LL" of the byte token is equal to 0,
; there are no literals to copy
NoLiterals: xor (hl) : inc hl
push de : jp m,CASE1xx
; short (5 or 9 bit long) offsets
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
; "01x": the case of the 9-bit offset
CASE01x: cp %01100000 : rl d
ReadOffsetE: ld e,(hl) : inc hl
SaveOffset: ld ixl,e : ld ixh,d
MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
add hl,de : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
ldir : pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals
jp pe,MoreLiterals ; 00 has already been processed; this identifies the case of 11
rrca : rrca : rrca
ld c,a : ld a,(hl) : inc hl ; token is re-read for further processing
ldir
; the token and literals are followed by the offset
push de : or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 5-bit offset
CASE10x: ld c,a : xor a
exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : inc hl : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld d,a : ld a,c
cp %10100000 : rl d
dec d : dec d : jr ReadOffsetE
; "00x": the case of the 5-bit offset
CASE00x: ld c,a : xor a
exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : inc hl : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jp SaveOffset
; two remaining cases
CASE11x cp %11100000 : jr c,CASE110
; "111": repeated offset
CASE111: ld e,ixl : ld d,ixh : jr MatchLen
; "110": 16-bit offset
CASE110: ld d,(hl) : inc hl : jr ReadOffsetE
;ReadNibble: ; 17 bytes, 44 t-state per nibble
; exa : ret m ; 4+11 = 15t
;UpdateNibble:
; ld a,(hl) : or #F0 : exa
; ld a,(hl) : inc hl : or #0F
; rrca : rrca : rrca : rrca : ret ; 4+5 + 7+7+4+7+6+7+4+4+4+4+10 = 73t

197
asm/z80/unlzsa2_small.asm Normal file
View File

@ -0,0 +1,197 @@
;
; Size-optimized LZSA2 decompressor by spke & uniabis (134 bytes)
;
; ver.00 by spke for LZSA 1.0.0 (02-09/06/2019, 145 bytes);
; ver.01 by spke for LZSA 1.0.5 (24/07/2019, added support for backward decompression);
; ver.02 by uniabis (30/07/2019, 144(-1) bytes, +3.3% speed and support for Hitachi HD64180);
; ver.03 by spke for LZSA 1.0.7 (01/08/2019, 140(-4) bytes, -1.4% speed and small re-organization of macros);
; ver.04 by spke for LZSA 1.1.0 (26/09/2019, removed usage of IY, added full revision history)
; ver.05 by spke for LZSA 1.1.1 (11/10/2019, 139(-1) bytes, +0.1% speed)
; ver.06 by spke (11-12/04/2021, added some comments)
; ver.07 by spke (04-05/04/2022, 134(-5) bytes, +1% speed, using self-modifying code by default)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f2 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA2
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
;
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b (+5 bytes, -3% speed)
; DEFINE AVOID_SELFMODIFYING_CODE ; uncomment to disallow self-modifying code (-1 byte, -4% speed)
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
;push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
; HL = DE - HL
ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 6*4 = 24t / 6 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
@DecompressLZSA2:
; in many places we assume that B = 0
; flag P in A' signals the need to re-load the nibble store
xor a : ld b,a : exa : jr .ReadToken
.CASE00x: ; token "00Z" stands for 5-bit offsets
; (read a nibble for offset bits 1-4 and use the inverted bit Z
; of the token as bit 0 of the offset; set bits 5-15 of the offset to 1)
push af
call ReadNibble.skipLDCA : ld c,a
pop af
cp %00100000 : rl c : jr .SaveOffset
.CASE0xx dec b : cp %01000000 : jr c,.CASE00x
.CASE01x: ; token "01Z" stands for 9-bit offsets
; (read a byte for offset bits 0-7 and use the inverted bit Z
; for bit 8 of the offset; set bits 9-15 of the offset to 1)
cp %01100000
.doRLB rl b
.OffsetReadC: ld c,(hl) : NEXT_HL
IFNDEF AVOID_SELFMODIFYING_CODE
.SaveOffset: ld (.PrevOffset),bc : ld b,0
ELSE
.SaveOffset: push bc : pop ix : ld b,0
ENDIF
.MatchLen: and %00000111 : add 2 : cp 9
call z,ExtendedCode
.CopyMatch: ld c,a
push hl ; BC = len, DE = dest, HL = -offset, SP -> [src]
IFNDEF AVOID_SELFMODIFYING_CODE
.PrevOffset EQU $+1 : ld hl,0
ELSE
push ix : pop hl
ENDIF
ADD_OFFSET
BLOCKCOPY ; BC = 0, DE = dest
pop hl ; HL = src
.ReadToken: ld a,(hl) : NEXT_HL : push af
and %00011000 : jr z,.NoLiterals
rrca : rrca : rrca
call pe,ExtendedCode
ld c,a
BLOCKCOPY
.NoLiterals: pop af : or a : jp p,.CASE0xx
.CASE1xx cp %11000000 : jr c,.CASE10x
; token "111" stands for repeat offsets
; (reuse the offset value of the previous match command)
cp %11100000 : jr nc,.MatchLen
.CASE110: ; token "110" stands for 16-bit offset
; (read a byte for offset bits 8-15, then another byte for offset bits 0-7)
ld b,(hl) : NEXT_HL : jr .OffsetReadC
.CASE10x: ; token "10Z" stands for 13-bit offsets
; (read a nibble for offset bits 9-12 and use the inverted bit Z
; for bit 8 of the offset, then read a byte for offset bits 0-7.
; set bits 13-15 of the offset to 1. substract 512 from the offset to get the final value)
call ReadNibble : ld b,a
ld a,c : cp %10100000
dec b : jr .doRLB
ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte
sub #F0+1 : add c : ret
ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc
ld a,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : ret nz
pop bc ; RET is not needed, because RET from ReadNibble is sufficient
ReadNibble: ld c,a
.skipLDCA xor a : exa : ret m
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca : ret

View File

@ -1,96 +0,0 @@
;
; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019, 145 bytes)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f2 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,CompressedData
; ld de,WhereToDecompress
; call DecompressLZSA2
;
; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
@DecompressLZSA2:
xor a : ld b,a : exa : jr ReadToken
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
CASE01x: cp %01100000 : rl d
OffsetReadE: ld e,(hl) : inc hl
SaveOffset: ld iyl,e : ld iyh,d
MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode
CopyMatch: ld c,a
ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
add hl,de : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
ldir : pop hl
ReadToken: ld a,(hl) : ld ixl,a : inc hl
and %00011000 : jr z,NoLiterals
rrca : rrca : rrca
call pe,ExtendedCode
ld c,a
ldir
NoLiterals: push de : ld a,ixl
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
CASE10x: call ReadNibble
ld d,a : ld a,c
cp %10100000 : rl d
dec d : dec d : jr OffsetReadE
CASE00x: call ReadNibble
ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
CASE11x cp %11100000 : jr c,CASE110
CASE111: ld e,iyl : ld d,iyh : jr MatchLen
CASE110: ld d,(hl) : inc hl : jr OffsetReadE
ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte
sub #F0+1 : add c : ret
ExtraByte ld a,15 : add c : add (hl) : inc hl : ret nc
ld a,(hl) : inc hl
ld b,(hl) : inc hl : ret nz
pop de : pop de : ret
ReadNibble: ld c,a : xor a : exa : ret m
UpdateNibble ld a,(hl) : or #F0 : exa
ld a,(hl) : inc hl : or #0F
rrca : rrca : rrca : rrca : ret

View File

@ -1,101 +0,0 @@
;
; Speed-optimized LZSA decompressor by spke (v.1 03-25/04/2019, 110 bytes)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,CompressedData
; ld de,WhereToDecompress
; call DecompressLZSA
;
; Of course, LZSA compression algorithm is (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
@DecompressLZSA:
ld b,0 : jr ReadToken
NoLiterals: xor (hl) : inc hl
push de : ld e,(hl) : inc hl : jp m,LongOffset
; short matches have length 0+3..14+3
ShortOffset: ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
; placed here this saves a JP per iteration
CopyMatch: ld c,a
.UseC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
add hl,de : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
ldir : pop hl ; BC = 0, DE = dest, HL = src
ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ld a,(hl) : and #70 : jr z,NoLiterals
cp #70 : jr z,MoreLiterals ; LLL=7 means 7+ literals...
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
ld c,a : ld a,(hl) : inc hl
ldir
; next we read the first byte of the offset
push de : ld e,(hl) : inc hl
; the top bit of token is set if the offset contains two bytes
and #8F : jp p,ShortOffset
LongOffset: ; read second byte of the offset
ld d,(hl) : inc hl
add -128+3 : cp 15+3 : jp c,CopyMatch
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: add (hl) : inc hl : jr nc,CopyMatch
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : ld c,(hl) : inc hl : jr nz,CopyMatch.UseC
.code0 ld b,(hl) : inc hl
; the two-byte match length equal to zero
; designates the end-of-data marker
ld a,b : or c : jr nz,CopyMatch.UseC
pop de : ret
MoreLiterals: ; there are three possible situations here
xor (hl) : inc hl : exa
ld a,7 : add (hl) : inc hl : jr c,ManyLiterals
CopyLiterals: ld c,a
.UseC ldir
push de : ld e,(hl) : inc hl
exa : jp p,ShortOffset : jr LongOffset
ManyLiterals:
.code1 ld b,a : ld c,(hl) : inc hl : jr nz,CopyLiterals.UseC
.code0 ld b,(hl) : inc hl : jr CopyLiterals.UseC

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 37 KiB

View File

@ -30,8 +30,10 @@
*
*/
#define _POSIX_C_SOURCE 200808
#include <stdio.h>
#include <stdlib.h>
#include <sys/types.h>
#include "format.h"
#include "lib.h"
@ -96,6 +98,6 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
ppDictionaryData = NULL;
*ppDictionaryData = NULL;
}
}

View File

@ -1,5 +1,5 @@
/*
* expand_v1.c - LZSA1 block decompressor implementation
* expand_block_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -120,7 +120,7 @@ static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppI
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
@ -166,7 +166,7 @@ int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockS
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 18) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 2);
@ -177,29 +177,36 @@ int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockS
if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
return -1;
}
}
else {

View File

@ -1,5 +1,5 @@
/*
* expand_v1.h - LZSA1 block decompressor definitions
* expand_block_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -30,8 +30,8 @@
*
*/
#ifndef _EXPAND_V1_H
#define _EXPAND_V1_H
#ifndef _EXPAND_BLOCK_V1_H
#define _EXPAND_BLOCK_V1_H
/**
* Decompress one LZSA1 data block
@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize);
#endif /* _EXPAND_V1_H */
#endif /* _EXPAND_BLOCK_V1_H */

View File

@ -1,5 +1,5 @@
/*
* expand_v2.c - LZSA2 block decompressor implementation
* expand_block_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -80,6 +80,9 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock
return -1;
}
}
else if ((*nLength) == 256) {
(*nLength) = 0;
}
}
else {
return -1;
@ -106,7 +109,7 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
@ -143,8 +146,8 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
}
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
if (pInBlock < pInBlockEnd) { /* The last token in the block does not include match information */
const unsigned char nOffsetMode = token & 0xc0;
unsigned int nValue;
switch (nOffsetMode) {
@ -182,6 +185,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
if ((token & 0x20) == 0) {
/* 16 bit offset */
nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
if (pInBlock >= pInBlockEnd) return -1;
nMatchOffset |= (unsigned int)(*pInBlock++);
nMatchOffset ^= 0xffff;
nMatchOffset++;
@ -192,7 +196,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 10) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 2);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
@ -202,29 +206,36 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
return -1;
if (nMatchLen == 0)
break;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
return -1;
}
}
else {

View File

@ -1,5 +1,5 @@
/*
* expand_v2.h - LZSA2 block decompressor definitions
* expand_block_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -30,8 +30,8 @@
*
*/
#ifndef _EXPAND_V2_H
#define _EXPAND_V2_H
#ifndef _EXPAND_BLOCK_V2_H
#define _EXPAND_BLOCK_V2_H
/**
* Decompress one LZSA2 data block
@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize);
#endif /* _EXPAND_V2_H */
#endif /* _EXPAND_BLOCK_V2_H */

View File

@ -35,6 +35,7 @@
#include "expand_context.h"
#include "expand_block_v1.h"
#include "expand_block_v2.h"
#include "lib.h"
/**
* Decompress one data block
@ -45,14 +46,31 @@
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion) {
int lzsa_decompressor_expand_block(unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize, const int nFormatVersion, const int nFlags) {
int nDecompressedSize;
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
if (nFormatVersion == 1)
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
nDecompressedSize = lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
nDecompressedSize = lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
return -1;
nDecompressedSize = -1;
if (nDecompressedSize != -1 && (nFlags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData + nOutDataOffset, nDecompressedSize);
}
if (nFlags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInBlock, nBlockSize);
}
return nDecompressedSize;
}

View File

@ -48,10 +48,11 @@ extern "C" {
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags (LZSA_FLAG_xxx)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion);
int lzsa_decompressor_expand_block(unsigned char *pInBlock, const int nBlockSize, unsigned char *pOutData, const int nOutDataOffset, const int nBlockMaxSize, const int nFormatVersion, const int nFlags);
#ifdef __cplusplus
}

View File

@ -98,8 +98,8 @@ size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion) {
const unsigned char *pCurFileData = pFileData;
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion) {
unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
unsigned char *pCurOutBuffer = pOutBuffer;
const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
@ -107,8 +107,7 @@ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOut
const int nHeaderSize = lzsa_get_header_size();
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
int nEODBytes = (*pFormatVersion == 2) ? 2 : 4;
return (size_t)lzsa_decompressor_expand_block(pFileData, (int)nFileSize - nEODBytes /* EOD marker */, pOutBuffer, 0, (int)nMaxOutBufferSize, *pFormatVersion);
return (size_t)lzsa_decompressor_expand_block(pFileData, (int)nFileSize, pOutBuffer, 0, (int)nMaxOutBufferSize, *pFormatVersion, nFlags);
}
/* Check header */
@ -140,7 +139,7 @@ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOut
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), *pFormatVersion);
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), *pFormatVersion, nFlags);
if (nDecompressedSize < 0)
return -1;

View File

@ -61,7 +61,7 @@ size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion);
size_t lzsa_decompress_inmem(unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, const unsigned int nFlags, int *pFormatVersion);
#ifdef __cplusplus
}

View File

@ -185,11 +185,6 @@ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pO
}
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
size_t nEODBytes = (nFormatVersion == 2) ? 2 : 4;
if (nReadBytes > nEODBytes)
nReadBytes -= nEODBytes;
else
nReadBytes = 0;
nBlockSize = (unsigned int)nReadBytes;
}
@ -201,7 +196,7 @@ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pO
nDecompressedSize = nBlockSize;
}
else {
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion);
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion, nFlags);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;

View File

@ -39,9 +39,6 @@
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**

View File

@ -60,10 +60,11 @@ int lzsa_get_frame_size(void) {
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion) {
if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;
@ -146,6 +147,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nFormatVersion pointer to returned format version, if successful
*
* @return 0 for success, or -1 for failure
*/

View File

@ -56,10 +56,11 @@ int lzsa_get_frame_size(void);
*
* @param pFrameData encoding buffer
* @param nMaxFrameDataSize max encoding buffer size, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion);
/**
* Encode compressed block frame header
@ -98,6 +99,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @param pFrameData data bytes
* @param nFrameDataSize number of bytes to decode
* @param nFormatVersion pointer to returned format version, if successful
*
* @return 0 for success, or -1 for failure
*/

View File

@ -1,138 +0,0 @@
/*
* hashmap.c - integer hashmap implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "hashmap.h"
/**
* Generate key hash by mixing
*
* @param key key to get hash for
*
* @return hash
*/
static unsigned int lzsa_hashmap_get_hash(unsigned long long key) {
key = (~key) + (key << 21);
key = key ^ (key >> 24);
key = (key + (key << 3)) + (key << 8);
key = key ^ (key >> 14);
key = (key + (key << 2)) + (key << 4);
key = key ^ (key >> 28);
key = key + (key << 31);
return key & (LZSA_HASH_NBUCKETS - 1);
}
/**
* Initialize hashmap
*
* @param pHashMap hashmap
*/
void lzsa_hashmap_init(lzsa_hashmap_t *pHashMap) {
pHashMap->pBuffer = NULL;
memset(pHashMap->pBucket, 0, sizeof(lzsa_hashvalue_t *) * LZSA_HASH_NBUCKETS);
}
/**
* Set value for key
*
* @param pHashMap hashmap
* @param key key to set value for
* @param value new value
*/
void lzsa_hashmap_insert(lzsa_hashmap_t *pHashMap, unsigned long long key, unsigned int value) {
unsigned int hash = lzsa_hashmap_get_hash(key);
lzsa_hashvalue_t **pBucket = &pHashMap->pBucket[hash];
while (*pBucket) {
if ((*pBucket)->key == key) {
(*pBucket)->value = value;
return;
}
pBucket = &((*pBucket)->pNext);
}
if (!pHashMap->pBuffer || pHashMap->pBuffer->nFreeEntryIdx >= 255) {
lzsa_hashbuffer_t *pNewBuffer = (lzsa_hashbuffer_t *)malloc(sizeof(lzsa_hashbuffer_t));
if (!pNewBuffer) return;
pNewBuffer->pNext = pHashMap->pBuffer;
pNewBuffer->nFreeEntryIdx = 0;
pHashMap->pBuffer = pNewBuffer;
}
*pBucket = &pHashMap->pBuffer->value[pHashMap->pBuffer->nFreeEntryIdx++];
(*pBucket)->pNext = NULL;
(*pBucket)->key = key;
(*pBucket)->value = value;
}
/**
* Get value for key
*
* @param pHashMap hashmap
* @param key key to get value for
* @param pValue pointer to where to store value if found
*
* @return 0 if found, nonzero if not found
*/
int lzsa_hashmap_find(lzsa_hashmap_t *pHashMap, unsigned long long key, unsigned int *pValue) {
unsigned int hash = lzsa_hashmap_get_hash(key);
lzsa_hashvalue_t **pBucket = &pHashMap->pBucket[hash];
while (*pBucket) {
if ((*pBucket)->key == key) {
*pValue = (*pBucket)->value;
return 0;
}
pBucket = &((*pBucket)->pNext);
}
return -1;
}
/**
* Clear hashmap
*
* @param pHashMap hashmap
*/
void lzsa_hashmap_clear(lzsa_hashmap_t *pHashMap) {
while (pHashMap->pBuffer) {
lzsa_hashbuffer_t *pCurBuffer = pHashMap->pBuffer;
pHashMap->pBuffer = pCurBuffer->pNext;
free(pCurBuffer);
pCurBuffer = NULL;
}
memset(pHashMap->pBucket, 0, sizeof(lzsa_hashvalue_t *) * LZSA_HASH_NBUCKETS);
}

View File

@ -1,99 +0,0 @@
/*
* hashmap.h - integer hashmap definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _HASHMAP_H
#define _HASHMAP_H
#include <stdlib.h>
/** Number of hashmap buckets */
#define LZSA_HASH_NBUCKETS 256
/* Forward definitions */
typedef struct _lzsa_hashvalue_t lzsa_hashvalue_t;
typedef struct _lzsa_hashbuffer_t lzsa_hashbuffer_t;
/** One hashmap bucket entry */
typedef struct _lzsa_hashvalue_t {
lzsa_hashvalue_t *pNext;
unsigned long long key;
unsigned int value;
} lzsa_hashvalue_t;
/** One buffer storing hashmap bucket entries */
typedef struct _lzsa_hashbuffer_t {
lzsa_hashbuffer_t *pNext;
int nFreeEntryIdx;
lzsa_hashvalue_t value[255];
} lzsa_hashbuffer_t;
/** Hashmap */
typedef struct {
lzsa_hashbuffer_t *pBuffer;
lzsa_hashvalue_t *pBucket[LZSA_HASH_NBUCKETS];
} lzsa_hashmap_t;
/**
* Initialize hashmap
*
* @param pHashMap hashmap
*/
void lzsa_hashmap_init(lzsa_hashmap_t *pHashMap);
/**
* Set value for key
*
* @param pHashMap hashmap
* @param key key to set value for
* @param value new value
*/
void lzsa_hashmap_insert(lzsa_hashmap_t *pHashMap, unsigned long long key, unsigned int value);
/**
* Get value for key
*
* @param pHashMap hashmap
* @param key key to get value for
* @param pValue pointer to where to store value if found
*
* @return 0 if found, nonzero if not found
*/
int lzsa_hashmap_find(lzsa_hashmap_t *pHashMap, unsigned long long key, unsigned int *pValue);
/**
* Clear hashmap
*
* @param pHashMap hashmap
*/
void lzsa_hashmap_clear(lzsa_hashmap_t *pHashMap);
#endif /* _HASHMAP_H */

View File

@ -48,27 +48,27 @@
extern "C" {
#endif
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION, /**< Internal decompression error */
} lzsa_status_t;
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
#define LZSA_FLAG_RAW_BACKWARD (1<<2) /**< 1 to compress or decompress raw block backward */
/**
* Reverse bytes in the specified buffer
*
* @param pBuffer pointer to buffer whose contents are to be reversed
* @param nBufferSize size of buffer in bytes
*/
static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
const int nMidPoint = nBufferSize / 2;
int i, j;
for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
const unsigned char c = pBuffer[i];
pBuffer[i] = pBuffer[j];
pBuffer[j] = c;
}
}
#ifdef __cplusplus
}

View File

@ -75,8 +75,8 @@ void divsufsort_destroy(divsufsort_ctx_t *ctx);
/**
* Constructs the suffix array of a given string.
* @param ctx suffix array context
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param T The input string.
* @param SA The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/

View File

@ -31,7 +31,6 @@
*/
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#ifdef _WIN32
@ -42,11 +41,13 @@
#endif
#include "lib.h"
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
#define OPT_RAW_BACKWARD 8
#define OPT_STATS 16
#define TOOL_VERSION "1.0.4"
#define TOOL_VERSION "1.4.1"
/*---------------------------------------------------------------------------*/
@ -100,23 +101,26 @@ static void compression_progress(long long nOriginalSize, long long nCompressedS
static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nCommandCount = 0;
int nCommandCount = 0, nSafeDist = 0;
int nFlags;
lzsa_status_t nStatus;
lzsa_stats stats;
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
}
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist, &stats);
if ((nOptions & OPT_VERBOSE)) {
if (nOptions & OPT_VERBOSE) {
nEndTime = do_get_time();
}
@ -135,14 +139,43 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
if (nStatus)
return 100;
if ((nOptions & OPT_VERBOSE)) {
if (nOptions & OPT_VERBOSE) {
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
pszInFilename, fDelta, fSpeed, nCommandCount, (double)nOriginalSize / (double)nCommandCount,
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
if (nOptions & OPT_RAW) {
fprintf(stdout, "Safe distance: %d (0x%X)\n", nSafeDist, nSafeDist);
}
}
if (nOptions & OPT_STATS) {
if (stats.literals_divisor > 0)
fprintf(stdout, "Literals: min: %d avg: %d max: %d count: %d\n", stats.min_literals, stats.total_literals / stats.literals_divisor, stats.max_literals, stats.literals_divisor);
else
fprintf(stdout, "Literals: none\n");
if (stats.match_divisor > 0) {
fprintf(stdout, "Offsets: min: %d avg: %d max: %d reps: %d count: %d\n", stats.min_offset, stats.total_offsets / stats.match_divisor, stats.max_offset, stats.num_rep_offsets, stats.match_divisor);
fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor);
}
else {
fprintf(stdout, "Offsets: none\n");
fprintf(stdout, "Match lens: none\n");
}
if (stats.rle1_divisor > 0) {
fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor);
}
else {
fprintf(stdout, "RLE1 lens: none\n");
}
if (stats.rle2_divisor > 0) {
fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor);
}
else {
fprintf(stdout, "RLE2 lens: none\n");
}
}
return 0;
}
@ -157,6 +190,8 @@ static int do_decompress(const char *pszInFilename, const char *pszOutFilename,
nFlags = 0;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
@ -259,7 +294,7 @@ int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, co
pCompareStream->pCompareDataBuf = NULL;
pCompareStream->nCompareDataSize = 0;
pCompareStream->f = (void*)fopen(pszCompareFilename, pszMode);
pCompareStream->f = (FILE*)fopen(pszCompareFilename, pszMode);
if (pCompareStream->f) {
stream->obj = pCompareStream;
@ -269,8 +304,10 @@ int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, co
stream->close = comparestream_close;
return 0;
}
else
else {
free(pCompareStream);
return -1;
}
}
static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
@ -305,6 +342,8 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con
nFlags = 0;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
@ -344,13 +383,13 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con
/*---------------------------------------------------------------------------*/
static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
static void generate_compressible_data(unsigned char *pBuffer, const size_t nBufferSize, const int nMinMatchSize, const unsigned int nSeed, const int nNumLiteralValues, const float fMatchProbability) {
size_t nIndex = 0;
int nMatchProbability = (int)(fMatchProbability * 1023.0f);
const int nMatchProbability = (int)(fMatchProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
if (nBufferSize == 0) return;
pBuffer[nIndex++] = rand() % nNumLiteralValues;
while (nIndex < nBufferSize) {
@ -384,14 +423,12 @@ static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSiz
}
}
static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
static void xor_data(unsigned char *pBuffer, const size_t nBufferSize, const unsigned int nSeed, const float fXorProbability) {
size_t nIndex = 0;
int nXorProbability = (int)(fXorProbability * 1023.0f);
const int nXorProbability = (const int)(fXorProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
while (nIndex < nBufferSize) {
if ((rand() & 1023) < nXorProbability) {
pBuffer[nIndex] ^= 0xff;
@ -400,7 +437,7 @@ static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nS
}
}
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
unsigned char *pGeneratedData;
unsigned char *pCompressedData;
unsigned char *pTmpCompressedData;
@ -416,6 +453,8 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
if (!pGeneratedData) {
@ -429,7 +468,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
fprintf(stderr, "out of memory, %zu bytes needed\n", nMaxCompressedDataSize);
return 100;
}
@ -440,7 +479,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
fprintf(stderr, "out of memory, %zu bytes needed\n", nMaxCompressedDataSize);
return 100;
}
@ -470,11 +509,11 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
size_t nDataSizeStep = 128;
float fProbabilitySizeStep = 0.0005f;
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) {
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((size_t)((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE))); nGeneratedDataSize += nDataSizeStep) {
float fMatchProbability;
fprintf(stdout, "size %zd", nGeneratedDataSize);
for (fMatchProbability = ((nOptions & OPT_RAW) ? 0.5f : 0); fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
fprintf(stdout, "size %zu", nGeneratedDataSize);
for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
float fXorProbability;
@ -488,7 +527,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
/* Try to compress it, expected to succeed */
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
nFlags, nMinMatchSize, nFormatVersion);
if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
if (nActualCompressedSize == (size_t)-1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -498,7 +537,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error compressing size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -506,7 +545,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
size_t nActualDecompressedSize;
int nDecFormatVersion = nFormatVersion;
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion);
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -516,7 +555,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error decompressing size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -530,7 +569,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zu, seed %u, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
@ -587,6 +626,8 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
if (pszDictionaryFilename) {
fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
@ -608,7 +649,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
pFileData = (unsigned char*)malloc(nFileSize);
if (!pFileData) {
fclose(f_in);
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
fprintf(stderr, "out of memory for reading '%s', %zu bytes needed\n", pszInFilename, nFileSize);
return 100;
}
@ -628,7 +669,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
if (!pCompressedData) {
free(pFileData);
fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
fprintf(stderr, "out of memory for compressing '%s', %zu bytes needed\n", pszInFilename, nMaxCompressedSize);
return 100;
}
@ -650,7 +691,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
long long t0 = do_get_time();
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
long long t1 = do_get_time();
if (nActualCompressedSize == -1) {
if (nActualCompressedSize == (size_t)-1) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "compression error\n");
@ -658,7 +699,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
}
long long nCurDecTime = t1 - t0;
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
if (nBestCompTime == (size_t)-1 || nBestCompTime > nCurDecTime)
nBestCompTime = nCurDecTime;
/* Check guard bytes before the output buffer */
@ -699,7 +740,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
free(pCompressedData);
free(pFileData);
fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
fprintf(stdout, "compressed size: %zu bytes\n", nActualCompressedSize);
fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
return 0;
@ -717,6 +758,8 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
nFlags = 0;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_RAW_BACKWARD)
nFlags |= LZSA_FLAG_RAW_BACKWARD;
if (pszDictionaryFilename) {
fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
@ -738,7 +781,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
pFileData = (unsigned char*)malloc(nFileSize);
if (!pFileData) {
fclose(f_in);
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
fprintf(stderr, "out of memory for reading '%s', %zu bytes needed\n", pszInFilename, nFileSize);
return 100;
}
@ -757,7 +800,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
nMaxDecompressedSize = 65536;
else
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
if (nMaxDecompressedSize == -1) {
if (nMaxDecompressedSize == (size_t)-1) {
free(pFileData);
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
return 100;
@ -766,7 +809,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize);
if (!pDecompressedData) {
free(pFileData);
fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize);
fprintf(stderr, "out of memory for decompressing '%s', %zu bytes needed\n", pszInFilename, nMaxDecompressedSize);
return 100;
}
@ -779,7 +822,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
long long t0 = do_get_time();
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion);
long long t1 = do_get_time();
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pDecompressedData);
free(pFileData);
fprintf(stderr, "decompression error\n");
@ -787,7 +830,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
}
long long nCurDecTime = t1 - t0;
if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
if (nBestDecTime == (size_t)-1 || nBestDecTime > nCurDecTime)
nBestDecTime = nCurDecTime;
}
@ -807,7 +850,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
free(pFileData);
fprintf(stdout, "format: LZSA%d\n", nFormatVersion);
fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize);
fprintf(stdout, "decompressed size: %zu bytes\n", nActualDecompressedSize);
fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0));
return 0;
@ -820,11 +863,11 @@ int main(int argc, char **argv) {
const char *pszInFilename = NULL;
const char *pszOutFilename = NULL;
const char *pszDictionaryFilename = NULL;
bool bArgsError = false;
bool bCommandDefined = false;
bool bVerifyCompression = false;
bool bMinMatchDefined = false;
bool bFormatVersionDefined = false;
int nArgsError = 0;
int nCommandDefined = 0;
int nVerifyCompression = 0;
int nMinMatchDefined = 0;
int nFormatVersionDefined = 0;
char cCommand = 'z';
int nMinMatchSize = 0;
unsigned int nOptions = OPT_FAVOR_RATIO;
@ -832,51 +875,51 @@ int main(int argc, char **argv) {
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-d")) {
if (!bCommandDefined) {
bCommandDefined = true;
if (!nCommandDefined) {
nCommandDefined = 1;
cCommand = 'd';
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-z")) {
if (!bCommandDefined) {
bCommandDefined = true;
if (!nCommandDefined) {
nCommandDefined = 1;
cCommand = 'z';
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-c")) {
if (!bVerifyCompression) {
bVerifyCompression = true;
if (!nVerifyCompression) {
nVerifyCompression = 1;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-cbench")) {
if (!bCommandDefined) {
bCommandDefined = true;
if (!nCommandDefined) {
nCommandDefined = 1;
cCommand = 'B';
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-dbench")) {
if (!bCommandDefined) {
bCommandDefined = true;
if (!nCommandDefined) {
nCommandDefined = 1;
cCommand = 'b';
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-test")) {
if (!bCommandDefined) {
bCommandDefined = true;
if (!nCommandDefined) {
nCommandDefined = 1;
cCommand = 't';
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-D")) {
if (!pszDictionaryFilename && (i + 1) < argc) {
@ -884,105 +927,119 @@ int main(int argc, char **argv) {
i++;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strncmp(argv[i], "-D", 2)) {
if (!pszDictionaryFilename) {
pszDictionaryFilename = argv[i] + 2;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-m")) {
if (!bMinMatchDefined && (i + 1) < argc) {
if (!nMinMatchDefined && (i + 1) < argc) {
char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10);
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
i++;
bMinMatchDefined = true;
nMinMatchDefined = 1;
nOptions &= (~OPT_FAVOR_RATIO);
}
else {
bArgsError = true;
nArgsError = 1;
}
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strncmp(argv[i], "-m", 2)) {
if (!bMinMatchDefined) {
if (!nMinMatchDefined) {
char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
bMinMatchDefined = true;
nMinMatchDefined = 1;
nOptions &= (~OPT_FAVOR_RATIO);
}
else {
bArgsError = true;
nArgsError = 1;
}
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "--prefer-ratio")) {
if (!bMinMatchDefined) {
if (!nMinMatchDefined) {
nMinMatchSize = 0;
bMinMatchDefined = true;
nMinMatchDefined = 1;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "--prefer-speed")) {
if (!bMinMatchDefined) {
if (!nMinMatchDefined) {
nMinMatchSize = 3;
nOptions &= (~OPT_FAVOR_RATIO);
bMinMatchDefined = true;
nMinMatchDefined = 1;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-f")) {
if (!bFormatVersionDefined && (i + 1) < argc) {
if (!nFormatVersionDefined && (i + 1) < argc) {
char *pEnd = NULL;
nFormatVersion = (int)strtol(argv[i + 1], &pEnd, 10);
if (pEnd && pEnd != argv[i + 1] && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
i++;
bFormatVersionDefined = true;
nFormatVersionDefined = 1;
}
else {
bArgsError = true;
nArgsError = 1;
}
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strncmp(argv[i], "-f", 2)) {
if (!bFormatVersionDefined) {
if (!nFormatVersionDefined) {
char *pEnd = NULL;
nFormatVersion = (int)strtol(argv[i] + 2, &pEnd, 10);
if (pEnd && pEnd != (argv[i] + 2) && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
bFormatVersionDefined = true;
nFormatVersionDefined = 1;
}
else {
bArgsError = true;
nArgsError = 1;
}
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-v")) {
if ((nOptions & OPT_VERBOSE) == 0) {
nOptions |= OPT_VERBOSE;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-r")) {
if ((nOptions & OPT_RAW) == 0) {
nOptions |= OPT_RAW;
}
else
bArgsError = true;
nArgsError = 1;
}
else if (!strcmp(argv[i], "-b")) {
if ((nOptions & OPT_RAW_BACKWARD) == 0) {
nOptions |= OPT_RAW_BACKWARD;
}
else
nArgsError = 1;
}
else if (!strcmp(argv[i], "-stats")) {
if ((nOptions & OPT_STATS) == 0) {
nOptions |= OPT_STATS;
}
else
nArgsError = 1;
}
else {
if (!pszInFilename)
@ -991,26 +1048,33 @@ int main(int argc, char **argv) {
if (!pszOutFilename)
pszOutFilename = argv[i];
else
bArgsError = true;
nArgsError = 1;
}
}
}
if (!bArgsError && cCommand == 't') {
if (!nArgsError && (nOptions & OPT_RAW_BACKWARD) && !(nOptions & OPT_RAW)) {
fprintf(stderr, "error: -b (compress backwards) requires -r (raw block format)\n");
return 100;
}
if (!nArgsError && cCommand == 't') {
return do_self_test(nOptions, nMinMatchSize, nFormatVersion);
}
if (bArgsError || !pszInFilename || !pszOutFilename) {
if (nArgsError || !pszInFilename || !pszOutFilename) {
fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
fprintf(stderr, " -c: check resulting stream after compressing\n");
fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -cbench: benchmary in-memory compression\n");
fprintf(stderr, " -dbench: benchmary in-memory decompression\n");
fprintf(stderr, " -cbench: benchmark in-memory compression\n");
fprintf(stderr, " -dbench: benchmark in-memory decompression\n");
fprintf(stderr, " -test: run automated self-tests\n");
fprintf(stderr, " -stats: show compressed data stats\n");
fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -f <value>: LZSA compression format (1-2)\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
fprintf(stderr, " -b: compress backward (requires -r and a backward decompressor)\n");
fprintf(stderr, " -D <filename>: use dictionary file\n");
fprintf(stderr, " -m <value>: minimum match size (3-5) (default: 3)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
@ -1022,8 +1086,10 @@ int main(int argc, char **argv) {
if (cCommand == 'z') {
int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
if (nResult == 0 && bVerifyCompression) {
nResult = do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
if (nResult == 0 && nVerifyCompression) {
return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
} else {
return nResult;
}
}
else if (cCommand == 'd') {

View File

@ -33,7 +33,17 @@
#include <string.h>
#include "matchfinder.h"
#include "format.h"
#include "lib.h"
/**
* Hash index into TAG_BITS
*
* @param nIndex index value
*
* @return hash
*/
static inline int lzsa_get_index_tag(unsigned int nIndex) {
return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
@ -55,7 +65,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i;
int i, r;
/* Compute the permuted LCP first (Kärkkäinen method) */
Phi[intervals[0]] = -1;
@ -66,7 +76,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
const int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
@ -77,18 +87,33 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
const int nMinMatchSize = pCompressor->min_match_size;
if (pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize; i++) {
const int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
int nTaggedLen = 0;
if (nLen)
nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
}
}
else {
for (i = 1; i < nInWindowSize; i++) {
const int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_AND_TAG_MAX)
nLen = LCP_AND_TAG_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
}
if (i < nInWindowSize)
intervals[i] &= POS_MASK;
/**
* Build intervals for finding matches
@ -106,7 +131,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
intervals[0] = 0;
next_interval_idx = 1;
for (int r = 1; r < nInWindowSize; r++) {
for (r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
@ -166,16 +191,19 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
unsigned int nPrevOffset = 0;
unsigned char nV1OffsetFound[2] = { 0, 0 };
/**
* Find matches using intervals
@ -209,19 +237,70 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref & EXCL_VISITED_MASK;
matchptr = pMatches;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
for (;;) {
if ((super_ref = pos_data[match_pos]) > ref) {
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) {
matchptr->length = (const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
else {
unsigned int nV1OffsetType = (nMatchOffset <= 256) ? 0 : 1;
if (!nV1OffsetFound[nV1OffsetType]) {
matchptr->length = (const unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (const unsigned short)nMatchOffset;
if (matchptr->length < 256)
nV1OffsetFound[nV1OffsetType] = 1;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
@ -229,6 +308,24 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
const unsigned short nMatchLen = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)));
if (nMatchLen > 2) {
matchptr->length = nMatchLen | 0x8000;
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
}
return (int)(matchptr - pMatches);
@ -248,40 +345,29 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0);
lzsa_find_matches_at(pCompressor, i, &match, 0, 0);
}
}
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match;
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
int m;
const int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
pMatch->length = 0;
pMatch->offset = 0;
}
else {
int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
if (nMaxLen < 0)
nMaxLen = 0;
if (pMatch->length > nMaxLen)
pMatch->length = (unsigned short)nMaxLen;
}
pMatch++;
if (nMatches < nMatchesPerOffset) {
memset(pMatch + nMatches, 0, (nMatchesPerOffset - nMatches) * sizeof(lzsa_match));
}
pMatch += nMatchesPerOffset;
}
}

View File

@ -33,14 +33,12 @@
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
@ -52,18 +50,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
*/
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
*
* @return number of matches
*/
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
/**
* Skip previously compressed bytes
*
@ -74,14 +60,14 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
* Find all matches for the data to be compressed
*
* @param pCompressor compression context
* @param nMatchesPerOffset maximum number of matches to store for each offset
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOffset, const int nStartOffset, const int nEndOffset);
#ifdef __cplusplus
}

View File

@ -1,5 +1,5 @@
/*
* shrink_v1.c - LZSA1 block compressor implementation
* shrink_block_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -67,7 +67,7 @@ static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, const int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
@ -118,7 +118,7 @@ static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, const int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
@ -139,110 +139,148 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
* Get offset encoding cost in bits
*
* @param __nMatchOffset offset to get cost of
*
* @return cost in bits
*/
#define lzsa_get_offset_cost_v1(__nMatchOffset) (((__nMatchOffset) <= 256) ? 8 : 16)
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param nReduce non-zero to reduce the number of tokens when the path costs are equal, zero not to
*/
static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset, const int nReduce) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1);
const int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i;
cost[nEndOffset - 1] = 8;
nLastLiteralsOffset = nEndOffset;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nBestCost, nBestMatchLen, nBestMatchOffset;
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V1); i += NARRIVALS_PER_POSITION_V1) {
lzsa_arrival* cur_arrival = &arrival[i];
int j;
int nLiteralsLen = nLastLiteralsOffset - i;
nBestCost = 8 + cost[i + 1];
if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nBestCost += 8;
}
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nBestCost += MODESWITCH_PENALTY;
nBestMatchLen = 0;
nBestMatchOffset = 0;
memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V1);
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (j = 0; j < NARRIVALS_PER_POSITION_V1; j++)
cur_arrival[j].cost = 0x40000000;
}
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].cost = 0;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].from_slot = -1;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1];
lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT_V1];
int j, m;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
const int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
const int nScore = cur_arrival[j].score + 1;
const int nNumLiterals = cur_arrival[j].num_literals + 1;
int n;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + nMatchLen];
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
else if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
if (nCodingChoiceCost < pDestLiteralSlots[n].cost ||
(nCodingChoiceCost == pDestLiteralSlots[n].cost && nScore < (pDestLiteralSlots[n].score + nDisableScore))) {
memmove(&pDestLiteralSlots[n + 1],
&pDestLiteralSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V1)
nMatchRunLen = MATCH_RUN_LEN_V1;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
lzsa_arrival* pDestArrival = &pDestLiteralSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i - nStartOffset;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
break;
}
}
}
if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
nLastLiteralsOffset = i;
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
const int nNumArrivalsForThisPos = j;
cost[i] = nBestCost;
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
if (nNumArrivalsForThisPos != 0) {
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
lzsa_arrival* pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V1];
int nCodingChoiceCost = cur_arrival[0].cost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0, n;
if (!cur_arrival[0].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
for (n = 0;
n < NARRIVALS_PER_POSITION_V1 && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(pDestSlots[n].rep_offset) == nMatchOffsetCost) {
exists = 1;
break;
}
}
if (!exists) {
const int nScore = cur_arrival[0].score + 5;
if (nCodingChoiceCost < pDestSlots[0].cost ||
(nCodingChoiceCost == pDestSlots[0].cost && nScore < (pDestSlots[0].score + nDisableScore))) {
memmove(&pDestSlots[1],
&pDestSlots[0],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - 1));
pDestSlots->cost = nCodingChoiceCost;
pDestSlots->rep_offset = match[m].offset;
pDestSlots->from_slot = 1;
pDestSlots->from_pos = i - nStartOffset;
pDestSlots->match_len = k;
pDestSlots->num_literals = 0;
pDestSlots->score = nScore;
}
}
}
}
}
}
const lzsa_arrival *end_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1];
lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
while (end_arrival->from_slot > 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) {
pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len;
pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset: 0;
end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V1) + (end_arrival->from_slot - 1)];
}
}
@ -251,80 +289,102 @@ static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nSt
* impacting the compression ratio
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset) {
lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < nEndOffset &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].length < MAX_VARLEN &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
const int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
const int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
pBestMatch[i].offset = pBestMatch[i + 1].offset;
pBestMatch[i + 1].length = 0;
pBestMatch[i + 1].offset = 0;
nDidReduce = 1;
continue;
}
}
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchLen = pMatch->length;
int nReduce = 0;
if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
(i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
int nNextLiterals = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < MIN_MATCH_SIZE_V1) {
nNextLiterals++;
nNextIndex++;
}
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
(8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
/* Reduce */
const int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
nDidReduce = 1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
if ((i + pMatch->length) < nEndOffset && pMatch->offset && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset &&
pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
(i + pMatch->length) >= pMatch->offset &&
(i + pMatch->length) >= pBestMatch[i + pMatch->length].offset &&
(i + pMatch->length + pBestMatch[i + pMatch->length].length) <= nEndOffset &&
!memcmp(pInWindow + i - pMatch->offset + pMatch->length,
pInWindow + i + pMatch->length - pBestMatch[i + pMatch->length].offset,
pBestMatch[i + pMatch->length].length)) {
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v1(0) + */ ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
if (nCurPartialSize >= nReducedPartialSize) {
const int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].length = 0;
pBestMatch[i + nMatchLen].offset = 0;
nDidReduce = 1;
continue;
}
}
i += pMatch->length;
nNumLiterals = 0;
}
else {
nNumLiterals++;
@ -348,22 +408,23 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const in
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
const lzsa_match *pBestMatch = pCompressor->best_match - nStartOffset;
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
const int nMatchOffset = pMatch->offset;
const int nMatchLen = pMatch->length;
const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -373,6 +434,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
@ -384,8 +452,45 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
pCompressor->stats.min_offset = nMatchOffset;
if (nMatchOffset > pCompressor->stats.max_offset)
pCompressor->stats.max_offset = nMatchOffset;
pCompressor->stats.total_offsets += nMatchOffset;
if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
pCompressor->stats.min_match_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_match_len)
pCompressor->stats.max_match_len = nMatchLen;
pCompressor->stats.total_match_lens += nMatchLen;
pCompressor->stats.match_divisor++;
if (nMatchOffset == 1) {
if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
pCompressor->stats.min_rle1_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle1_len)
pCompressor->stats.max_rle1_len = nMatchLen;
pCompressor->stats.total_rle1_lens += nMatchLen;
pCompressor->stats.rle1_divisor++;
}
else if (nMatchOffset == 2) {
if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
pCompressor->stats.min_rle2_len = nMatchLen;
if (nMatchLen > pCompressor->stats.max_rle2_len)
pCompressor->stats.max_rle2_len = nMatchLen;
pCompressor->stats.total_rle2_lens += nMatchLen;
pCompressor->stats.rle2_divisor++;
}
i += nMatchLen;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
const int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
}
else {
@ -397,8 +502,8 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -406,13 +511,25 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) /* | 0x00 */;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
pCompressor->stats.min_literals = nNumLiterals;
if (nNumLiterals > pCompressor->stats.max_literals)
pCompressor->stats.max_literals = nNumLiterals;
pCompressor->stats.total_literals += nNumLiterals;
pCompressor->stats.literals_divisor++;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
const int nCurSafeDist = (i - nStartOffset) - nOutOffset;
if (nCurSafeDist >= 0 && pCompressor->safe_dist < nCurSafeDist)
pCompressor->safe_dist = nCurSafeDist;
}
pCompressor->num_commands++;
@ -446,11 +563,11 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nNumLiterals = nEndOffset - nStartOffset;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nOutOffset = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -462,7 +579,6 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nStartOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
@ -482,8 +598,8 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
@ -492,17 +608,26 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult;
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
if (nInDataSize < 65536) {
lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
}
else {
lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
}
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nResult = lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
if (nResult < 0 && (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)) {
nResult = lzsa_write_raw_uncompressed_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

View File

@ -1,5 +1,5 @@
/*
* shrink_v1.h - LZSA1 block compressor definitions
* shrink_block_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -33,21 +33,28 @@
#ifndef _SHRINK_BLOCK_V1_H
#define _SHRINK_BLOCK_V1_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_BLOCK_V1_H */

File diff suppressed because it is too large Load Diff

View File

@ -1,5 +1,5 @@
/*
* shrink_v2.h - LZSA2 block compressor definitions
* shrink_block_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -33,21 +33,28 @@
#ifndef _SHRINK_BLOCK_V2_H
#define _SHRINK_BLOCK_V2_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
#include "shrink_context.h"
#ifdef __cplusplus
extern "C" {
#endif
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_BLOCK_V2_H */

View File

@ -37,6 +37,7 @@
#include "shrink_block_v2.h"
#include "format.h"
#include "matchfinder.h"
#include "lib.h"
/**
* Initialize compression context
@ -44,34 +45,44 @@
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
const int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
const int nMaxMinMatchForFormat = (nFormatVersion == 1) ? 5 : 3;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->selected_match = NULL;
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->slot_cost = NULL;
pCompressor->repmatch_opt = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_slot_handled_mask = NULL;
pCompressor->rep_len_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->offset_cache = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > nMaxMinMatchForFormat)
pCompressor->min_match_size = nMaxMinMatchForFormat;
pCompressor->max_forward_depth = 0;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->safe_dist = 0;
pCompressor->num_commands = 0;
memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
pCompressor->stats.min_literals = -1;
pCompressor->stats.min_match_len = -1;
pCompressor->stats.min_offset = -1;
pCompressor->stats.min_rle1_len = -1;
pCompressor->stats.min_rle2_len = -1;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
@ -80,37 +91,43 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT_V2) * sizeof(lzsa_arrival));
if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->selected_match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->selected_match) {
pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->improved_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if (pCompressor->improved_match) {
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
if (pCompressor->slot_cost) {
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
if (pCompressor->repmatch_opt)
return 0;
if (pCompressor->best_match) {
if (pCompressor->format_version == 2)
pCompressor->match = (lzsa_match*)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V2 * sizeof(lzsa_match));
else
pCompressor->match = (lzsa_match*)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_slot_handled_mask = (unsigned char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(unsigned char));
if (pCompressor->rep_slot_handled_mask) {
pCompressor->rep_len_handled_mask = (unsigned char*)malloc(((LCP_MAX + 1) / 8) * sizeof(unsigned char));
if (pCompressor->rep_len_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
pCompressor->offset_cache = (int*)malloc(2048 * sizeof(int));
if (pCompressor->offset_cache) {
return 0;
}
}
}
}
}
}
else {
return 0;
}
}
}
else {
return 0;
}
}
}
}
@ -129,29 +146,29 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->repmatch_opt) {
free(pCompressor->repmatch_opt);
pCompressor->repmatch_opt = NULL;
if (pCompressor->offset_cache) {
free(pCompressor->offset_cache);
pCompressor->offset_cache = NULL;
}
if (pCompressor->slot_cost) {
free(pCompressor->slot_cost);
pCompressor->slot_cost = NULL;
if (pCompressor->next_offset_for_pos) {
free(pCompressor->next_offset_for_pos);
pCompressor->next_offset_for_pos = NULL;
}
if (pCompressor->improved_match) {
free(pCompressor->improved_match);
pCompressor->improved_match = NULL;
if (pCompressor->first_offset_for_byte) {
free(pCompressor->first_offset_for_byte);
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
if (pCompressor->rep_len_handled_mask) {
free(pCompressor->rep_len_handled_mask);
pCompressor->rep_len_handled_mask = NULL;
}
if (pCompressor->selected_match) {
free(pCompressor->selected_match);
pCompressor->selected_match = NULL;
if (pCompressor->rep_slot_handled_mask) {
free(pCompressor->rep_slot_handled_mask);
pCompressor->rep_slot_handled_mask = NULL;
}
if (pCompressor->match) {
@ -159,6 +176,16 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
pCompressor->match = NULL;
}
if (pCompressor->arrival) {
free(pCompressor->arrival);
pCompressor->arrival = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
@ -187,23 +214,43 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nCompressedSize;
if (pCompressor->format_version == 1) {
return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
}
else if (pCompressor->format_version == 2) {
return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
nCompressedSize = -1;
else {
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, (pCompressor->format_version == 2) ? NMATCHES_PER_INDEX_V2 : NMATCHES_PER_INDEX_V1, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
nCompressedSize = lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else if (pCompressor->format_version == 2) {
nCompressedSize = lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
if (nCompressedSize != -1 && (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD)) {
lzsa_reverse_buffer(pOutData, nCompressedSize);
}
}
else {
nCompressedSize = -1;
}
}
if (pCompressor->flags & LZSA_FLAG_RAW_BACKWARD) {
lzsa_reverse_buffer(pInWindow + nPreviousBlockSize, nInDataSize);
}
return nCompressedSize;
}
/**

View File

@ -34,29 +34,38 @@
#define _SHRINK_CONTEXT_H
#include "divsufsort.h"
#include "hashmap.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LCP_BITS 14
#define LCP_MAX (1U<<(LCP_BITS - 1))
#define TAG_BITS 4
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (31-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define NARRIVALS_PER_POSITION_V1 8
#define NARRIVALS_PER_POSITION_V2_SMALL 9
#define NARRIVALS_PER_POSITION_V2_BIG 32
#define NARRIVALS_PER_POSITION_V2_MAX 64
#define ARRIVALS_PER_POSITION_SHIFT_V1 3
#define ARRIVALS_PER_POSITION_SHIFT_V2 6
#define LEAVE_ALONE_MATCH_SIZE 1000
#define NMATCHES_PER_INDEX_V1 16
#define MATCHES_PER_INDEX_SHIFT_V1 4
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
#define NMATCHES_PER_INDEX_V2 64
#define MATCHES_PER_INDEX_SHIFT_V2 6
#define MODESWITCH_PENALTY 1
#define LEAVE_ALONE_MATCH_SIZE 300
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
#define MODESWITCH_PENALTY 3
/** One match */
typedef struct _lzsa_match {
@ -64,12 +73,48 @@ typedef struct _lzsa_match {
unsigned short offset;
} lzsa_match;
/** One rep-match slot (for LZSA2) */
typedef struct _lzsa_repmatch_opt {
int incoming_offset;
short best_slot_for_incoming;
short expected_repmatch;
} lzsa_repmatch_opt;
/** Forward arrival slot */
typedef struct _lzsa_arrival {
int cost;
unsigned short rep_offset;
short from_slot;
unsigned short from_pos;
unsigned short rep_len;
unsigned short match_len;
unsigned short num_literals;
int rep_pos;
int score;
} lzsa_arrival;
/** Compression statistics */
typedef struct _lzsa_stats {
int min_literals;
int max_literals;
int total_literals;
int min_offset;
int max_offset;
int num_rep_offsets;
int total_offsets;
int min_match_len;
int max_match_len;
int total_match_lens;
int min_rle1_len;
int max_rle1_len;
int total_rle1_lens;
int min_rle2_len;
int max_rle2_len;
int total_rle2_lens;
int literals_divisor;
int match_divisor;
int rle1_divisor;
int rle2_divisor;
} lzsa_stats;
/** Compression context */
typedef struct _lzsa_compressor {
@ -78,17 +123,19 @@ typedef struct _lzsa_compressor {
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *selected_match;
lzsa_match *best_match;
lzsa_match *improved_match;
int *slot_cost;
lzsa_repmatch_opt *repmatch_opt;
lzsa_arrival *arrival;
unsigned char *rep_slot_handled_mask;
unsigned char *rep_len_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int *offset_cache;
int min_match_size;
int max_forward_depth;
int format_version;
int flags;
int safe_dist;
int num_commands;
lzsa_hashmap_t cost_map;
lzsa_stats stats;
} lzsa_compressor;
/**
@ -97,6 +144,7 @@ typedef struct _lzsa_compressor {
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFormatVersion version of format to use (1-2)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
@ -122,7 +170,7 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks

View File

@ -62,7 +62,7 @@ size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
lzsa_compressor compressor;
size_t nOriginalSize = 0;
@ -84,21 +84,6 @@ size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutB
}
}
if ((compressor.flags & LZSA_FLAG_FAVOR_RATIO)) {
if (nInputSize < 16384)
compressor.max_forward_depth = 25;
else {
if (nInputSize < 32768)
compressor.max_forward_depth = 15;
else {
if (nInputSize < BLOCK_SIZE)
compressor.max_forward_depth = 10;
else
compressor.max_forward_depth = 0;
}
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
@ -157,7 +142,7 @@ size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutB
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
nError = LZSA_ERROR_DST;
else {
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);

View File

@ -61,7 +61,7 @@ size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
#ifdef __cplusplus

View File

@ -70,11 +70,13 @@ static void lzsa_delete_file(const char *pszInFilename) {
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
@ -98,7 +100,7 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
return nStatus;
}
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount);
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
@ -127,12 +129,14 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
unsigned char *pInData, *pOutData;
lzsa_compressor compressor;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
@ -200,21 +204,6 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
}
nDictionaryDataSize = 0;
if (nNumBlocks == 0 && (compressor.flags & LZSA_FLAG_FAVOR_RATIO)) {
if (nInDataSize < 16384)
compressor.max_forward_depth = 25;
else {
if (nInDataSize < 32768)
compressor.max_forward_depth = 15;
else {
if (nInDataSize < BLOCK_SIZE)
compressor.max_forward_depth = 10;
else
compressor.max_forward_depth = 0;
}
}
}
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, ((nInDataSize + nRawPadding) >= BLOCK_SIZE) ? BLOCK_SIZE : (nInDataSize + nRawPadding));
@ -301,6 +290,11 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
progress(nOriginalSize, nCompressedSize);
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
int nSafeDist = compressor.safe_dist;
if (pStats)
*pStats = compressor.stats;
lzsa_compressor_destroy(&compressor);
free(pOutData);
@ -319,6 +313,8 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
*pCompressedSize = nCompressedSize;
if (pCommandCount)
*pCommandCount = nCommandCount;
if (pSafeDist)
*pSafeDist = nSafeDist;
return LZSA_OK;
}
}

View File

@ -33,15 +33,13 @@
#ifndef _SHRINK_STREAMING_H
#define _SHRINK_STREAMING_H
#include "shrink_context.h"
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**
@ -57,12 +55,14 @@ typedef enum _lzsa_status_t lzsa_status_t;
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
/*-------------- Streaming API -------------- */
@ -80,12 +80,14 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
#ifdef __cplusplus
}

View File

@ -34,13 +34,17 @@
#include <stdlib.h>
#include <string.h>
#include "stream.h"
#ifdef _WIN32
#include <fcntl.h>
#include <io.h>
#endif
/**
* Close file stream
*
* @param stream stream
*/
static void lzsa_filestream_close(lzsa_stream_t *stream) {
static void lzsa_filestream_close(struct _lzsa_stream_t *stream) {
if (stream->obj) {
fclose((FILE*)stream->obj);
stream->obj = NULL;
@ -60,7 +64,7 @@ static void lzsa_filestream_close(lzsa_stream_t *stream) {
*
* @return number of bytes read
*/
static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
static size_t lzsa_filestream_read(struct _lzsa_stream_t *stream, void *ptr, size_t size) {
return fread(ptr, 1, size, (FILE*)stream->obj);
}
@ -73,7 +77,7 @@ static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size
*
* @return number of bytes written
*/
static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
static size_t lzsa_filestream_write(struct _lzsa_stream_t *stream, void *ptr, size_t size) {
return fwrite(ptr, 1, size, (FILE*)stream->obj);
}
@ -84,7 +88,7 @@ static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t siz
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
static int lzsa_filestream_eof(lzsa_stream_t *stream) {
static int lzsa_filestream_eof(struct _lzsa_stream_t *stream) {
return feof((FILE*)stream->obj);
}
@ -98,7 +102,32 @@ static int lzsa_filestream_eof(lzsa_stream_t *stream) {
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
stream->obj = (void*)fopen(pszInFilename, pszMode);
const char* stdInOutFile = "-";
const char* stdInMode = "rb";
const char* stdOutMode = "wb";
#ifdef _WIN32
int result;
#endif
if (!strncmp(pszInFilename, stdInOutFile, 1)) {
if (!strncmp(pszMode, stdInMode, 2)) {
#ifdef _WIN32
result = _setmode(_fileno(stdin), _O_BINARY);
#endif
stream->obj = stdin;
} else if (!strncmp(pszMode, stdOutMode, 2)) {
#ifdef _WIN32
result = _setmode(_fileno(stdout), _O_BINARY);
#endif
stream->obj = stdout;
} else {
return -1;
}
} else {
stream->obj = (void*)fopen(pszInFilename, pszMode);
}
if (stream->obj) {
stream->read = lzsa_filestream_read;
stream->write = lzsa_filestream_write;

View File

@ -37,8 +37,23 @@
extern "C" {
#endif
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION /**< Internal decompression error */
} lzsa_status_t;
/* I/O stream */
typedef struct _lzsa_stream_t {
@ -54,7 +69,7 @@ typedef struct _lzsa_stream_t {
*
* @return number of bytes read
*/
size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
size_t(*read)(struct _lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Write to stream
@ -65,7 +80,7 @@ typedef struct _lzsa_stream_t {
*
* @return number of bytes written
*/
size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
size_t(*write)(struct _lzsa_stream_t *stream, void *ptr, size_t size);
/**
@ -75,14 +90,14 @@ typedef struct _lzsa_stream_t {
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
int(*eof)(lzsa_stream_t *stream);
int(*eof)(struct _lzsa_stream_t *stream);
/**
* Close stream
*
* @param stream stream
*/
void(*close)(lzsa_stream_t *stream);
void(*close)(struct _lzsa_stream_t *stream);
} lzsa_stream_t;
/**