39 Commits
1.0.5 ... 1.0.9

Author SHA1 Message Date
1a4f662360 Bump version 2019-09-20 12:26:16 +02:00
c12e20b7fb Improve LZSA2 compression ratio 2019-09-20 12:24:27 +02:00
51644ad2f9 Speed LZSA2 compression up further; fix typo 2019-09-19 17:18:37 +02:00
1495b27f69 Speed up LZSA1 compression with forward arrivals 2019-09-19 12:57:39 +02:00
c052a188f2 Reduce LZSA2 forward arrivals memory use 2019-09-19 11:46:03 +02:00
e4076e4090 Speed LZSA2 compression up; tiny ratio increase 2019-09-19 00:11:26 +02:00
8b7d0ab04d Increase LZSA2 ratio. Decrease token count 2019-09-17 08:10:52 +02:00
b1da9c1aee Add extra bound checks in C decompressors 2019-09-12 16:19:14 +02:00
b92a003338 Merge pull request #29 from francois-berder/master
Various improvements -- thank you!
2019-08-28 13:50:00 +02:00
4f2d7da136 Fix main return value if compressing
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:41:54 +01:00
a318ac2f83 Fix memory leak in comparestream_open
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:40:49 +01:00
da67938978 Set dictionnary to NULL in lzsa_dictionary_free
Signed-off-by: Francois Berder <18538310+francois-berder@users.noreply.github.com>
2019-08-28 09:39:07 +01:00
2d213bcff1 Bump version number 2019-08-27 13:18:23 +02:00
9de7e930e9 Faster LZSA1 z80 decompression 2019-08-27 13:16:20 +02:00
ef259e6867 Implement forward arrivals optimal parsers 2019-08-27 00:51:34 +02:00
90b4da64d1 Merge pull request #27 from uniabis/twobytesshorter
2bytes shorter
2019-08-26 23:49:27 +02:00
a807344343 2bytes shorter 2019-08-22 12:55:55 +09:00
27d0fe4e83 Merge pull request #26 from arm-in/patch-1
Update README.md
2019-08-06 20:54:24 +02:00
f8e445a98a Update README.md
Now 67 bytes with commit be30cae636
2019-08-06 20:15:59 +02:00
0e567bde47 Merge pull request #25 from specke/master
-1 byte
2019-08-06 20:03:52 +02:00
be30cae636 -1 byte
slightly slower, but this is the size-optimized branch
2019-08-06 12:36:27 +01:00
1b368e71ad Fix comments, header single inclusion defines 2019-08-04 16:42:30 +02:00
d98220ff42 Merge pull request #24 from specke/master
New Pareto frontier graph
2019-08-01 16:51:29 +02:00
d412433df4 New Pareto frontier graph
Shows improved performance of the new Z80 decompressors, esp. due to the improvements by uniabis
2019-08-01 15:26:53 +01:00
77c1492310 Merge pull request #23 from specke/master
New faster and shorter decompressors
2019-08-01 16:19:24 +02:00
44bff39de3 New faster and shorter decompressors
This update is mostly about better integration of improvements by uniabis, with spke contributing several smaller size optimizations.
2019-08-01 15:07:14 +01:00
3c690b04f5 Merge pull request #22 from specke/master
incorporated improvements by uniabis
2019-08-01 01:34:46 +02:00
e7bb1faece Merge branch 'master' into master 2019-07-31 23:24:30 +01:00
e48d2dafde Merge pull request #21 from uniabis/hd64180 - up to 3% speedup on Z80!
hd64180 support on z80 unpacker
2019-07-31 23:57:23 +02:00
51ef92cdab incorporated improvements by uniabis
also, slightly faster decompression for fast packer in backwards mode
2019-07-31 20:42:47 +01:00
8d0528fddc hd64180 support
a bit faster, a bit smaller
2019-07-31 01:39:27 +09:00
b3aae36ecc Bump version 2019-07-28 00:25:51 +02:00
8787b1c3d8 Merge pull request #20 from specke/master (should be already fixed now..)
fix a bug in the backward version of unlzsa2_fast_v1.asm
2019-07-27 15:50:38 +02:00
0a04796b19 Fix for z80 LZSA2 fast backward depacker 2019-07-27 15:39:44 +02:00
ac3bf78273 fix a bug in the backward version of unlzsa2_fast_v1.asm
an INC HL slipped through
2019-07-27 14:14:54 +01:00
82edcb8bb5 Fix literal runs that are multiple of 256 bytes 2019-07-27 01:35:46 +02:00
b613d01565 Test incompressible data with raw blocks 2019-07-26 13:30:41 +02:00
ae4cc12aed Use ACME syntax 2019-07-26 12:31:26 +02:00
316dfdcdce Fix comments, remove unused vars 2019-07-26 01:12:17 +02:00
24 changed files with 912 additions and 798 deletions

View File

@ -29,7 +29,7 @@ Performance over well-known compression corpus files:
Large 11159482 3771025 (33,79%) 3648420 (32,69%) 3528725 (31,62%)
enwik9 1000000000 371841591 (37,18%) 355360717 (35,54%) 337063553 (33,71%)
As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 69 bytes.
As an example of LZSA1's simplicity, a size-optimized decompressor on Z80 has been implemented in 67 bytes.
The compressor is approximately 2X slower than LZ4_HC but compresses better while maintaining similar decompression speeds and decompressor simplicity.

View File

@ -49,10 +49,10 @@ DECODE_TOKEN
AND #$70 ; isolate literals count
BEQ NO_LITERALS ; skip if no literals to copy
LSR A ; shift literals count into place
LSR A
LSR A
LSR A
LSR ; shift literals count into place
LSR
LSR
LSR
CMP #$07 ; LITERALS_RUN_LEN?
BCC PREPARE_COPY_LITERALS ; if not, count is directly embedded in token
@ -71,7 +71,8 @@ LARGE_VARLEN_LITERALS ; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
BYTE $A9 ; mask TAX (faster than BCS)
TXA
PREPARE_COPY_LITERALS
TAX
BEQ COPY_LITERALS
@ -91,7 +92,7 @@ NO_LITERALS
JSR GETSRC ; get 8 bit offset from stream in A
TAX ; save for later
LDA #$0FF ; high 8 bits
LDA #$FF ; high 8 bits
BNE GOT_OFFSET ; go prepare match
; (*like JMP GOT_OFFSET but shorter)
@ -110,7 +111,7 @@ COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
@ -120,7 +121,7 @@ ifdef BACKWARD_DECOMPRESS
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
else
} else {
; Forward decompression -- put backreference bytes forward
@ -129,7 +130,7 @@ else
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
endif
}
DEX
BNE COPY_MATCH_LOOP
@ -142,7 +143,7 @@ GET_LONG_OFFSET ; handle 16 bit offset:
GOT_OFFSET
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
@ -160,7 +161,7 @@ OFFSHI = *+1
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
else
} else {
; Forward decompression - add match offset
@ -176,7 +177,7 @@ OFFSHI = *+1
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
endif
}
PLA ; retrieve token from stack again
AND #$0F ; isolate match len (MMMM)
@ -200,7 +201,7 @@ endif
DECOMPRESSION_DONE
RTS
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
@ -235,7 +236,7 @@ GETSRC_DONE
PLA
RTS
else
} else {
; Forward decompression -- get and put bytes forward
@ -266,4 +267,4 @@ LZSA_SRC_HI = *+2
GETSRC_DONE
RTS
endif
}

View File

@ -53,9 +53,9 @@ DECODE_TOKEN
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
LSR A ; shift literals count into place
LSR A
LSR A
LSR ; shift literals count into place
LSR
LSR
CMP #$03 ; LITERALS_RUN_LEN_V2?
BCC PREPARE_COPY_LITERALS ; if less, count is directly embedded in token
@ -102,7 +102,7 @@ NO_LITERALS
; 00Z: 5 bit offset
LDX #$0FF ; set offset bits 15-8 to 1
LDX #$FF ; set offset bits 15-8 to 1
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
@ -142,7 +142,7 @@ GOT_OFFSET_LO
STX OFFSHI ; store high byte of match offset
REP_MATCH
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression - substract match offset
@ -157,7 +157,7 @@ OFFSHI = *+1
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
SEC
else
} else {
; Forward decompression - add match offset
@ -171,7 +171,7 @@ OFFSHI = *+1
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
endif
}
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
@ -208,7 +208,7 @@ COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
JSR PUTDST ; copy to destination
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- put backreference bytes backward
@ -218,7 +218,7 @@ ifdef BACKWARD_DECOMPRESS
GETMATCH_DONE
DEC COPY_MATCH_LOOP+1
else
} else {
; Forward decompression -- put backreference bytes forward
@ -227,7 +227,7 @@ else
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
endif
}
DEX
BNE COPY_MATCH_LOOP
@ -266,7 +266,7 @@ HAS_NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
ifdef BACKWARD_DECOMPRESS
!ifdef BACKWARD_DECOMPRESS {
; Backward decompression -- get and put bytes backward
@ -301,7 +301,7 @@ GETSRC_DONE
PLA
RTS
else
} else {
; Forward decompression -- get and put bytes forward
@ -332,4 +332,5 @@ LZSA_SRC_HI = *+2
GETSRC_DONE
RTS
endif
}

View File

@ -1,5 +1,6 @@
;
; Speed-optimized LZSA decompressor by spke (v.1 03-25/04/2019, 110 bytes)
; Speed-optimized LZSA1 decompressor by spke (v.2 03/04/2019-27/08/2019; 111 bytes);
; with improvements by uniabis (30/07/2019,22/08/2019; -3 bytes, +3% speed).
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -12,7 +13,7 @@
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA
; call DecompressLZSA1
;
; Backward compression is also supported; you can compress files backward using:
;
@ -22,11 +23,11 @@
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithm is (c) 2019 Emmanuel Marty,
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
@ -49,50 +50,59 @@
; DEFINE BACKWARD_DECOMPRESS
IFDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
or a : sbc hl,de
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ELSE
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ex de,hl : add hl,de
ENDM
MACRO COPY_MATCH
ldi : ldi : ldir
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM
MACRO COPY_MATCH
ldd : ldd : lddr
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
@DecompressLZSA:
@DecompressLZSA1:
ld b,0 : jr ReadToken
NoLiterals: xor (hl) : NEXT_HL
push de : ld e,(hl) : NEXT_HL : jp m,LongOffset
NoLiterals: xor (hl)
push de : NEXT_HL : ld e,(hl) : jp m,LongOffset
; short matches have length 0+3..14+3
ShortOffset: ld d,#FF : add 3 : cp 15+3 : jr nc,LongerMatch
; placed here this saves a JP per iteration
CopyMatch: ld c,a
.UseC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
BLOCKCOPY : pop hl ; BC = 0, DE = dest, HL = src
.UseC NEXT_HL : ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
COPY_MATCH : pop hl ; BC = 0, DE = dest, HL = src
ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
@ -102,26 +112,26 @@ ReadToken: ; first a byte token "O|LLL|MMMM" is read from the stream,
cp #70 : jr z,MoreLiterals ; LLL=7 means 7+ literals...
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
ld c,a : ld a,(hl) : NEXT_HL
BLOCKCOPY
ld c,a : ld a,(hl)
NEXT_HL : BLOCKCOPY
; next we read the first byte of the offset
push de : ld e,(hl) : NEXT_HL
push de : ld e,(hl)
; the top bit of token is set if the offset contains two bytes
and #8F : jp p,ShortOffset
LongOffset: ; read second byte of the offset
ld d,(hl) : NEXT_HL
NEXT_HL : ld d,(hl)
add -128+3 : cp 15+3 : jp c,CopyMatch
; MMMM=15 indicates a multi-byte number of literals
LongerMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
LongerMatch: NEXT_HL : add (hl) : jr nc,CopyMatch
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1 ld b,a : ld c,(hl) : NEXT_HL : jr nz,CopyMatch.UseC
.code0 ld b,(hl) : NEXT_HL
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyMatch.UseC
.code0 NEXT_HL : ld b,(hl)
; the two-byte match length equal to zero
; designates the end-of-data marker
@ -129,17 +139,17 @@ LongerMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
pop de : ret
MoreLiterals: ; there are three possible situations here
xor (hl) : NEXT_HL : exa
ld a,7 : add (hl) : NEXT_HL : jr c,ManyLiterals
xor (hl) : exa
ld a,7 : NEXT_HL : add (hl) : jr c,ManyLiterals
CopyLiterals: ld c,a
.UseC BLOCKCOPY
.UseC NEXT_HL : BLOCKCOPY
push de : ld e,(hl) : NEXT_HL
push de : ld e,(hl)
exa : jp p,ShortOffset : jr LongOffset
ManyLiterals:
.code1 ld b,a : ld c,(hl) : NEXT_HL : jr nz,CopyLiterals.UseC
.code0 ld b,(hl) : NEXT_HL : jr CopyLiterals.UseC
.code1 ld b,a : NEXT_HL : ld c,(hl) : jr nz,CopyLiterals.UseC
.code0 NEXT_HL : ld b,(hl) : jr CopyLiterals.UseC

View File

@ -1,128 +1,129 @@
;
; Size-optimized LZSA decompressor by spke (v.1 23/04/2019, 69 bytes)
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithm is (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE BACKWARD_DECOMPRESS
IFDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
or a : sbc hl,de
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ELSE
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ENDIF
@DecompressLZSA:
ld b,0
; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ReadToken: ld a,(hl) : exa : ld a,(hl) : NEXT_HL
and #70 : jr z,NoLiterals
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
cp #07 : call z,ReadLongBA ; LLL=7 means 7+ literals...
ld c,a : BLOCKCOPY
; next we read the low byte of the -offset
NoLiterals: push de : ld e,(hl) : NEXT_HL : ld d,#FF
; the top bit of token is set if
; the offset contains the high byte as well
exa : or a : jp p,ShortOffset
LongOffset: ld d,(hl) : NEXT_HL
; last but not least, the match length is read
ShortOffset: and #0F : add 3 ; MMMM<15 means match lengths 0+3..14+3
cp 15+3 : call z,ReadLongBA ; MMMM=15 means lengths 14+3+
ld c,a
ex (sp),hl : push hl ; BC = len, DE = -offset, HL = dest, SP ->[dest,src]
ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest+(-offset), SP->[src]
BLOCKCOPY : pop hl ; BC = 0, DE = dest, HL = src
jr ReadToken
; a standard routine to read extended codes
; into registers B (higher byte) and A (lower byte).
ReadLongBA: add (hl) : NEXT_HL : ret nc
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1: ld b,a : ld a,(hl) : NEXT_HL : ret nz
.code0: ld c,a : ld b,(hl) : NEXT_HL
; the two-byte match length equal to zero
; designates the end-of-data marker
or b : ld a,c : ret nz
pop de : pop de : ret
;
; Size-optimized LZSA1 decompressor by spke (v.1 23/04/2019, 68 bytes);
; with improvements by uniabis (30/07/2019, -1 byte, +3% speed).
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
;
; lzsa.exe -f1 -r <sourcefile> <outfile>
;
; where option -r asks for the generation of raw (frame-less) data.
;
; The decompression is done in the standard way:
;
; ld hl,FirstByteOfCompressedData
; ld de,FirstByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; Backward compression is also supported; you can compress files backward using:
;
; lzsa.exe -f1 -r -b <sourcefile> <outfile>
;
; and decompress the resulting files using:
;
; ld hl,LastByteOfCompressedData
; ld de,LastByteOfMemoryForDecompressedData
; call DecompressLZSA1
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE BACKWARD_DECOMPRESS
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
@DecompressLZSA1:
ld b,0
; first a byte token "O|LLL|MMMM" is read from the stream,
; where LLL is the number of literals and MMMM is
; a length of the match that follows after the literals
ReadToken: ld a,(hl) : exa : ld a,(hl) : NEXT_HL
and #70 : jr z,NoLiterals
rrca : rrca : rrca : rrca ; LLL<7 means 0..6 literals...
cp #07 : call z,ReadLongBA ; LLL=7 means 7+ literals...
ld c,a : BLOCKCOPY
; next we read the low byte of the -offset
NoLiterals: push de : ld e,(hl) : NEXT_HL : ld d,#FF
; the top bit of token is set if
; the offset contains the high byte as well
exa : or a : jp p,ShortOffset
LongOffset: ld d,(hl) : NEXT_HL
; last but not least, the match length is read
ShortOffset: and #0F : add 3 ; MMMM<15 means match lengths 0+3..14+3
cp 15+3 : call z,ReadLongBA ; MMMM=15 means lengths 14+3+
ld c,a
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl : jr ReadToken ; HL = src
; a standard routine to read extended codes
; into registers B (higher byte) and A (lower byte).
ReadLongBA: add (hl) : NEXT_HL : ret nc
; the codes are designed to overflow;
; the overflow value 1 means read 1 extra byte
; and overflow value 0 means read 2 extra bytes
.code1: ld b,a : ld a,(hl) : NEXT_HL : ret nz
.code0: ld c,a : ld b,(hl) : NEXT_HL
; the two-byte match length equal to zero
; designates the end-of-data marker
or b : ld a,c : ret nz
pop de : pop de : ret

View File

@ -1,5 +1,6 @@
;
; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019, 218 bytes)
; Speed-optimized LZSA2 decompressor by spke (v.1 02-07/06/2019, 214 bytes);
; with improvements by uniabis (30/07/2019, -5 bytes, +3% speed and support for Hitachi HD64180).
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -26,7 +27,7 @@
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty,
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
@ -47,77 +48,80 @@
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; DEFINE BACKWARD_DECOMPRESS
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
IFDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
or a : sbc hl,de
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ELSE
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
ex de,hl : ld a,e : sub l : ld l,a
ld a,d : sbc h : ld h,a ; 4*4+3*4 = 28t / 7 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
IFNDEF HD64180
MACRO LD_IX_DE
ld ixl,e : ld ixh,d
ENDM
MACRO LD_DE_IX
ld e,ixl : ld d,ixh
ENDM
ELSE
MACRO LD_IX_DE
push de : pop ix
ENDM
MACRO LD_DE_IX
push ix : pop de
ENDM
ENDIF
@DecompressLZSA2:
; A' stores next nibble as %1111.... or assumed to contain trash
; B is assumed to be 0
xor a : ld b,a : exa : jr ReadToken
ld b,0 : scf : exa : jr ReadToken
LongerMatch: exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
;inc a : jr z,LongMatch : sub #F0-9+1 : jp CopyMatch
LongMatch: ;ld a,24 :
add (hl) : NEXT_HL : jr nc,CopyMatch
ManyLiterals: ld a,18 : add (hl) : NEXT_HL : jr nc,CopyLiterals
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL
jr nz,CopyMatch.useC
pop de : ret
ManyLiterals: ld a,18 :
add (hl) : NEXT_HL : jr nc,CopyLiterals
ld c,(hl) : NEXT_HL
ld a,b : ld b,(hl) : inc hl
jr CopyLiterals.useBC
ld a,b : ld b,(hl)
jr ReadToken.NEXTHLuseBC
MoreLiterals: ld b,(hl) : NEXT_HL
exa : jp m,.noUpdate
scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
@ -126,18 +130,25 @@ MoreLiterals: ld b,(hl) : NEXT_HL
.noUpdate ;sub #F0-3 : cp 15+3 : jr z,ManyLiterals
inc a : jr z,ManyLiterals : sub #F0-3+1
CopyLiterals: ld c,a
.useC ld a,b : ld b,0
.useBC BLOCKCOPY
push de : or a : jp p,CASE0xx : jr CASE1xx
CopyLiterals: ld c,a : ld a,b : ld b,0
BLOCKCOPY
push de : or a : jp p,CASE0xx ;: jr CASE1xx
cp %11000000 : jr c,CASE10x
CASE11x cp %11100000 : jr c,CASE110
; "111": repeated offset
CASE111: LD_DE_IX : jr MatchLen
Literals0011: jr nz,MoreLiterals
; if "LL" of the byte token is equal to 0,
; there are no literals to copy
NoLiterals: xor (hl) : NEXT_HL
NoLiterals: or (hl) : NEXT_HL
push de : jp m,CASE1xx
; short (5 or 9 bit long) offsets
@ -146,25 +157,25 @@ CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
; "01x": the case of the 9-bit offset
CASE01x: cp %01100000 : rl d
ReadOffsetE: ld e,(hl) : NEXT_HL
ReadOffsetE ld e,(hl) : NEXT_HL
SaveOffset: ld ixl,e : ld ixh,d
SaveOffset: LD_IX_DE
MatchLen: inc a : and %00000111 : jr z,LongerMatch : inc a
CopyMatch: ld c,a
.useC ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
.useC ex (sp),hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest-offset, SP->[src]
BLOCKCOPY : pop hl
; compressed data stream contains records
; each record begins with the byte token "XYZ|LL|MMM"
ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals
ReadToken: ld a,(hl) : and %00011000 : jp pe,Literals0011 ; process the cases 00 and 11 separately
jp pe,MoreLiterals ; 00 has already been processed; this identifies the case of 11
rrca : rrca : rrca
ld c,a : ld a,(hl) : NEXT_HL ; token is re-read for further processing
ld c,a : ld a,(hl) ; token is re-read for further processing
.NEXTHLuseBC NEXT_HL
BLOCKCOPY
; the token and literals are followed by the offset
@ -173,8 +184,7 @@ ReadToken: ld a,(hl) : and %00011000 : jr z,NoLiterals
CASE1xx cp %11000000 : jr nc,CASE11x
; "10x": the case of the 5-bit offset
CASE10x: ld c,a : xor a
exa : jp m,.noUpdate
CASE10x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
@ -184,29 +194,42 @@ CASE10x: ld c,a : xor a
cp %10100000 : rl d
dec d : dec d : jr ReadOffsetE
; "00x": the case of the 5-bit offset
CASE00x: ld c,a : xor a
exa : jp m,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jp SaveOffset
; two remaining cases
CASE11x cp %11100000 : jr c,CASE110
; "111": repeated offset
CASE111: ld e,ixl : ld d,ixh : jr MatchLen
; "110": 16-bit offset
CASE110: ld d,(hl) : NEXT_HL : jr ReadOffsetE
; "00x": the case of the 5-bit offset
CASE00x: ld c,a : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
LongerMatch: scf : exa : jr nc,.noUpdate
ld a,(hl) : or #F0 : exa
ld a,(hl) : NEXT_HL : or #0F
rrca : rrca : rrca : rrca
.noUpdate sub #F0-9 : cp 15+9 : jr c,CopyMatch
LongMatch: add (hl) : NEXT_HL : jr nc,CopyMatch
ld c,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : jr nz,CopyMatch.useC
pop de : ret

View File

@ -1,5 +1,6 @@
;
; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019, 145 bytes)
; Size-optimized LZSA2 decompressor by spke (v.1 02-09/06/2019, 140 bytes);
; with improvements by uniabis (30/07/2019, -1 byte, +3% speed and support for Hitachi HD64180).
;
; The data must be compressed using the command line compressor by Emmanuel Marty
; The compression is done as follows:
@ -26,7 +27,7 @@
;
; (do not forget to uncomment the BACKWARD_DECOMPRESS option in the decompressor).
;
; Of course, LZSA2 compression algorithm is (c) 2019 Emmanuel Marty,
; Of course, LZSA2 compression algorithms are (c) 2019 Emmanuel Marty,
; see https://github.com/emmanuel-marty/lzsa for more information
;
; Drop me an email if you have any comments/ideas/suggestions: zxintrospec@gmail.com
@ -48,57 +49,85 @@
; 3. This notice may not be removed or altered from any source distribution.
;
; DEFINE BACKWARD_DECOMPRESS
; DEFINE BACKWARD_DECOMPRESS ; uncomment for data compressed with option -b
; DEFINE HD64180 ; uncomment for systems using Hitachi HD64180
IFDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
or a : sbc hl,de
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ELSE
IFNDEF BACKWARD_DECOMPRESS
MACRO NEXT_HL
inc hl
ENDM
MACRO ADD_OFFSET
add hl,de
ex de,hl : add hl,de
ENDM
MACRO BLOCKCOPY
ldir
ENDM
ELSE
MACRO NEXT_HL
dec hl
ENDM
MACRO ADD_OFFSET
push hl : or a : sbc hl,de : pop de ; 11+4+15+10 = 40t / 5 bytes
ENDM
MACRO BLOCKCOPY
lddr
ENDM
ENDIF
IFNDEF HD64180
MACRO LD_IY_DE
ld iyl,e : ld iyh,d
ENDM
MACRO LD_DE_IY
ld e,iyl : ld d,iyh
ENDM
ELSE
MACRO LD_IY_DE
push de : pop iy
ENDM
MACRO LD_DE_IY
push iy : pop de
ENDM
ENDIF
@DecompressLZSA2:
xor a : ld b,a : exa : jr ReadToken
CASE00x: call ReadNibble
ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
CASE0xx ld d,#FF : cp %01000000 : jr c,CASE00x
CASE01x: cp %01100000 : rl d
OffsetReadE: ld e,(hl) : NEXT_HL
SaveOffset: ld iyl,e : ld iyh,d
SaveOffset: LD_IY_DE
MatchLen: and %00000111 : add 2 : cp 9 : call z,ExtendedCode
CopyMatch: ld c,a
ex (sp),hl : push hl ; BC = len, DE = offset, HL = dest, SP ->[dest,src]
ADD_OFFSET : pop de ; BC = len, DE = dest, HL = dest-offset, SP->[src]
BLOCKCOPY : pop hl
ex (sp),hl ; BC = len, DE = -offset, HL = dest, SP -> [src]
ADD_OFFSET ; BC = len, DE = dest, HL = dest+(-offset), SP -> [src]
BLOCKCOPY ; BC = 0, DE = dest
pop hl ; HL = src
ReadToken: ld a,(hl) : ld ixl,a : NEXT_HL
ReadToken: ld a,(hl) : NEXT_HL : push af
and %00011000 : jr z,NoLiterals
rrca : rrca : rrca
@ -107,7 +136,7 @@ ReadToken: ld a,(hl) : ld ixl,a : NEXT_HL
ld c,a
BLOCKCOPY
NoLiterals: push de : ld a,ixl
NoLiterals: pop af : push de
or a : jp p,CASE0xx
CASE1xx cp %11000000 : jr nc,CASE11x
@ -115,24 +144,20 @@ CASE1xx cp %11000000 : jr nc,CASE11x
CASE10x: call ReadNibble
ld d,a : ld a,c
cp %10100000 : rl d
dec d : dec d : jr OffsetReadE
dec d : dec d : DB #CA ; jr OffsetReadE ; #CA is JP Z,.. to skip all commands in CASE110 before jr OffsetReadE
CASE00x: call ReadNibble
ld e,a : ld a,c
cp %00100000 : rl e : jr SaveOffset
CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE
CASE11x cp %11100000 : jr c,CASE110
CASE111: ld e,iyl : ld d,iyh : jr MatchLen
CASE110: ld d,(hl) : NEXT_HL : jr OffsetReadE
CASE111: LD_DE_IY : jr MatchLen
ExtendedCode: call ReadNibble : inc a : jr z,ExtraByte
sub #F0+1 : add c : ret
ExtraByte ld a,15 : add c : add (hl) : NEXT_HL : ret nc
ld a,(hl) : NEXT_HL
ld b,(hl) : NEXT_HL : ret nz
pop de : pop de : ret
pop de : pop de ; RET is not needed, because RET from ReadNibble is sufficient
ReadNibble: ld c,a : xor a : exa : ret m
UpdateNibble ld a,(hl) : or #F0 : exa

Binary file not shown.

Before

Width:  |  Height:  |  Size: 34 KiB

After

Width:  |  Height:  |  Size: 46 KiB

View File

@ -96,6 +96,6 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
ppDictionaryData = NULL;
*ppDictionaryData = NULL;
}
}

View File

@ -1,5 +1,5 @@
/*
* expand_v1.c - LZSA1 block decompressor implementation
* expand_block_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -166,7 +166,7 @@ int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockS
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 18) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 2);
@ -181,27 +181,32 @@ int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockS
break;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
return -1;
}
}
else {

View File

@ -1,5 +1,5 @@
/*
* expand_v1.h - LZSA1 block decompressor definitions
* expand_block_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -30,8 +30,8 @@
*
*/
#ifndef _EXPAND_V1_H
#define _EXPAND_V1_H
#ifndef _EXPAND_BLOCK_V1_H
#define _EXPAND_BLOCK_V1_H
/**
* Decompress one LZSA1 data block
@ -46,4 +46,4 @@
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V1_H */
#endif /* _EXPAND_BLOCK_V1_H */

View File

@ -1,5 +1,5 @@
/*
* expand_v2.c - LZSA2 block decompressor implementation
* expand_block_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -195,7 +195,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd && (pSrc + 10) <= pOutDataEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 2);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
@ -209,27 +209,32 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
break;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if ((pSrc + nMatchLen) <= pOutDataEnd) {
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
return -1;
}
}
else {

View File

@ -1,5 +1,5 @@
/*
* expand_v2.h - LZSA2 block decompressor definitions
* expand_block_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -30,8 +30,8 @@
*
*/
#ifndef _EXPAND_V2_H
#define _EXPAND_V2_H
#ifndef _EXPAND_BLOCK_V2_H
#define _EXPAND_BLOCK_V2_H
/**
* Decompress one LZSA2 data block
@ -46,4 +46,4 @@
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V2_H */
#endif /* _EXPAND_BLOCK_V2_H */

View File

@ -47,7 +47,7 @@
#define OPT_FAVOR_RATIO 4
#define OPT_RAW_BACKWARD 8
#define TOOL_VERSION "1.0.5"
#define TOOL_VERSION "1.0.9"
/*---------------------------------------------------------------------------*/
@ -277,8 +277,10 @@ int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, co
stream->close = comparestream_close;
return 0;
}
else
else {
free(pCompareStream);
return -1;
}
}
static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
@ -486,7 +488,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
float fMatchProbability;
fprintf(stdout, "size %zd", nGeneratedDataSize);
for (fMatchProbability = ((nOptions & OPT_RAW) ? 0.5f : 0); fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
float fXorProbability;
@ -1052,7 +1054,9 @@ int main(int argc, char **argv) {
if (cCommand == 'z') {
int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
if (nResult == 0 && bVerifyCompression) {
nResult = do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
} else {
return nResult;
}
}
else if (cCommand == 'd') {

View File

@ -35,6 +35,17 @@
#include "format.h"
#include "lib.h"
/**
* Hash index into TAG_BITS
*
* @param nIndex index value
*
* @return hash
*/
static inline int lzsa_get_index_tag(unsigned int nIndex) {
return (int)(((unsigned long long)nIndex * 11400714819323198485ULL) >> (64ULL - TAG_BITS));
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
@ -78,15 +89,31 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
int nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
}
}
else {
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_AND_TAG_MAX)
nLen = LCP_AND_TAG_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
}
if (i < nInWindowSize)
intervals[i] &= POS_MASK;
@ -219,7 +246,12 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
}
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}

View File

@ -1,5 +1,5 @@
/*
* shrink_v1.c - LZSA1 block compressor implementation
* shrink_block_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -139,7 +139,130 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
* Get offset encoding cost in bits
*
* @param nMatchOffset offset to get cost of
*
* @return cost in bits
*/
static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
return (nMatchOffset <= 256) ? 8 : 16;
}
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_arrival *arrival = pCompressor->arrival;
int nMinMatchSize = pCompressor->min_match_size;
int i, j, n;
memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
arrival[nStartOffset << MATCHES_PER_OFFSET_SHIFT].from_slot = -1;
for (i = nStartOffset; i != (nEndOffset - 1); i++) {
const lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nNumLiterals = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT)];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost <= pDestArrival->cost) {
memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + 1],
&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT)],
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = 0;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
}
}
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nMatchLen = pMatch[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(pMatch[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
for (n = 0;
n < 3 && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
n++) {
if (lzsa_get_offset_cost_v1(arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset) == lzsa_get_offset_cost_v1(pMatch[m].offset)) {
exists = 1;
break;
}
}
for (n = 0; !exists && n < 3 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost <= pDestArrival->cost) {
memmove(&arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n + 1],
&arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = pMatch[m].offset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->rep_offset = pMatch[m].offset;
break;
}
}
}
}
}
}
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_OFFSET_SHIFT) + 0];
pCompressor->match[i << MATCHES_PER_OFFSET_SHIFT].length = 0;
pCompressor->match[i << MATCHES_PER_OFFSET_SHIFT].offset = 0;
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
pCompressor->match[end_arrival->from_pos << MATCHES_PER_OFFSET_SHIFT].length = end_arrival->match_len;
pCompressor->match[end_arrival->from_pos << MATCHES_PER_OFFSET_SHIFT].offset = end_arrival->match_offset;
end_arrival = &arrival[(end_arrival->from_pos << MATCHES_PER_OFFSET_SHIFT) + (end_arrival->from_slot - 1)];
}
}
/**
* Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
@ -504,7 +627,10 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult;
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
lzsa_optimize_arrivals_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
else
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;

View File

@ -1,5 +1,5 @@
/*
* shrink_v1.h - LZSA1 block compressor definitions
* shrink_block_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*

View File

@ -1,5 +1,5 @@
/*
* shrink_v2.c - LZSA2 block compressor implementation
* shrink_block_v2.c - LZSA2 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -36,8 +36,7 @@
#include "shrink_block_v2.h"
#include "format.h"
#include "hashmap.h"
#define HASH_KEY(__nRepMatchOffset,__nNumLiterals,__i) (((unsigned long long)(__nRepMatchOffset)) | (((unsigned long long)(__i)) << 17) | (((unsigned long long)(__nNumLiterals)) << 34))
#include "matchfinder.h"
/**
* Write 4-bit nibble to output (compressed) buffer
@ -178,17 +177,205 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset) {
lzsa_arrival *arrival = pCompressor->arrival;
int nMinMatchSize = pCompressor->min_match_size;
int i, j, n;
lzsa_match match[32];
memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
arrival[nStartOffset << MATCHES_PER_OFFSET_SHIFT].from_slot = -1;
for (i = nStartOffset; i != (nEndOffset - 1); i++) {
int m, nMatches;
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nNumLiterals = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V2) {
nCodingChoiceCost += 4;
}
else if (nNumLiterals == (LITERALS_RUN_LEN_V2 + 15)) {
nCodingChoiceCost += 8;
}
else if (nNumLiterals == 256) {
nCodingChoiceCost += 16;
}
int exists = 0;
for (n = 0;
n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
n++) {
if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) {
exists = 1;
}
}
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost <= pDestArrival->cost) {
memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n + 1],
&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n],
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = 0;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
break;
}
}
}
nMatches = lzsa_find_matches_at(pCompressor, i, match, 32);
for (m = 0; m < nMatches; m++) {
int nMatchLen = match[m].length;
int nMatchOffset = match[m].offset;
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
int nStartingMatchLen, k;
int nRepMatchLenForArrival[NMATCHES_PER_OFFSET];
for (j = 0; j < NMATCHES_PER_OFFSET; j++)
nRepMatchLenForArrival[j] = 0;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT];
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost;
int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
for (n = 0;
n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (pDestSlots[n].rep_offset == nMatchOffset) {
exists = 1;
break;
}
}
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost <= pDestArrival->cost) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = nMatchOffset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->rep_offset = nMatchOffset;
break;
}
}
/* This coding choice doesn't rep-match; see if we still get a match by using the current repmatch offset for this arrival. This can occur (and not have the
* matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions
* of identical bytes, for instance. Checking for this provides a big compression win on some files. */
if (nMatchOffset != nRepOffset &&
nRepOffset &&
i >= nRepOffset &&
k > nRepMatchLenForArrival[j] &&
!memcmp(pInWindow + i - nRepOffset + nRepMatchLenForArrival[j], pInWindow + i - nMatchOffset + nRepMatchLenForArrival[j], k - nRepMatchLenForArrival[j])) {
nRepMatchLenForArrival[j] = k;
/* A match is possible at the rep offset; insert the extra coding choice. */
nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + /* rep match - no offset cost */ nMatchLenCost;
exists = 0;
for (n = 0;
n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
n++) {
if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1;
break;
}
}
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost <= pDestArrival->cost) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_offset = nRepOffset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->rep_offset = nRepOffset;
break;
}
}
}
}
}
}
}
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_OFFSET_SHIFT) + 0];
pCompressor->best_match[i].length = 0;
pCompressor->best_match[i].offset = 0;
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
pCompressor->best_match[end_arrival->from_pos].length = end_arrival->match_len;
pCompressor->best_match[end_arrival->from_pos].offset = end_arrival->match_offset;
end_arrival = &arrival[(end_arrival->from_pos << MATCHES_PER_OFFSET_SHIFT) + (end_arrival->from_slot - 1)];
}
}
/**
* Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_backward_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int *prev_match = (int*)pCompressor->intervals; /* Reuse */
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
lzsa_match *pBestMatch = pCompressor->best_match;
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
@ -445,130 +632,95 @@ static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nSt
* impacting the compression ratio
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param pBestMatch optimal matches to evaluate and update
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
int nPreviousMatchOffset = -1;
int nRepMatchOffset = 0;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchLen = pMatch->length;
int nReduce = 0;
int nCurrentMatchOffset = i;
if ((i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
int nNextLiterals = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nRepMatchSize = (nRepMatchOffset <= 32) ? 4 : ((nRepMatchOffset <= 512) ? 8 : ((nRepMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : nRepMatchSize;
while (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length < MIN_MATCH_SIZE_V2) {
nNextLiterals++;
nNextIndex++;
}
if (pBestMatch[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
if (nNextIndex < nEndOffset && pBestMatch[nNextIndex].length >= MIN_MATCH_SIZE_V2) {
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match */
if (pBestMatch[i + nMatchLen].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
if (nRepMatchOffset && pMatch->offset != nRepMatchOffset && (pBestMatch[nNextIndex].offset != pMatch->offset || pBestMatch[nNextIndex].offset == nRepMatchOffset ||
((pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16))) >=
((pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16))))) {
/* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
* matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the
* ratio (the forward arrivals parser already has this covered). */
if (i >= nRepMatchOffset && !memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length))
pMatch->offset = nRepMatchOffset;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
else {
if (nMatchOffset != nRepMatchOffset &&
pBestMatch[i + nMatchLen].offset == nRepMatchOffset) {
/* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if (nCommandSize > ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen) - nRepMatchSize)) {
/* Same case, replacing this command by literals alone isn't enough on its own to have savings, however this match command is inbetween two matches with
* identical offsets, while this command has a different match offset. Replacing it with literals allows to use a rep-match for the two commands around it, and
* that is enough for some savings. Replace. */
nReduce = 1;
}
int nCurCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
if (pMatch->offset != nRepMatchOffset)
nCurCommandSize += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
/* Calculate the next command's current cost */
int nNextCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
if (pBestMatch[nNextIndex].offset != pMatch->offset)
nNextCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
/* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */
int nReducedCommandSize = (pMatch->length << 3) + 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals + pMatch->length + nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
if (pBestMatch[nNextIndex].offset != nRepMatchOffset)
nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
/* Reduce */
int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pBestMatch[nCurIndex].length < MIN_MATCH_SIZE_V2);
if (nCurIndex >= nEndOffset || pBestMatch[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pBestMatch[nCurIndex].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
else {
if (nCurIndex < nEndOffset && pBestMatch[nCurIndex].length >= MIN_MATCH_SIZE_V2 &&
pBestMatch[nCurIndex].offset != nMatchOffset &&
pBestMatch[nCurIndex].offset == nRepMatchOffset) {
if (nCommandSize > ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals) - nRepMatchSize)) {
/* Same case, but now replacing this command allows to use a rep-match and get savings, so do it */
nReduce = 1;
}
}
nDidReduce = 1;
continue;
}
}
}
if (nReduce) {
int j;
if ((i + pMatch->length) < nEndOffset && pMatch->length >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pBestMatch[i + pMatch->length].offset == pMatch->offset && (pMatch->length % pMatch->offset) == 0 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN) {
int nMatchLen = pMatch->length;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
/* Join */
nDidReduce = 1;
if (nPreviousMatchOffset >= 0) {
repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
nPreviousMatchOffset = -1;
}
}
else {
if (pMatch->length)
nRepMatchOffset = pMatch->offset;
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pBestMatch[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pBestMatch[i + nMatchLen].length) <= MAX_VARLEN) {
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
continue;
}
nPreviousMatchOffset = nCurrentMatchOffset;
nRepMatchOffset = pMatch->offset;
i += pMatch->length;
nNumLiterals = 0;
}
else {
nNumLiterals++;
@ -579,246 +731,6 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, lzsa_mat
return nDidReduce;
}
/**
* Get cost of the best encoding choice at a given offset, going forward
*
* @param pCompressor compression context
* @param i offset in input window
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes)
* @param nNumLiterals current pending number of literals to be encoded with the next token
* @param nRepMatchOffset current rep-match offset
* @param nDepth current recursion depth
* @param pBestMatchLen pointer to returned best match length for the position (0 for no match)
* @param pBestMatchOffset pointer to returned best match offset for the position (if there is a match)
*
* @return cost of best encoding choice for offset
*/
static int lzsa_get_forward_cost_v2(lzsa_compressor *pCompressor, const int i, const int nEndOffset, const int nNumLiterals, const int nRepMatchOffset, int nDepth, int *pBestMatchLen, int *pBestMatchOffset) {
if (i >= nEndOffset)
return 0;
int *cost = (int*)pCompressor->pos_data; /* Reuse */
if (nDepth >= pCompressor->max_forward_depth)
return cost[i];
if (nDepth >= 1) {
unsigned int nValue = 0;
if (!lzsa_hashmap_find(&pCompressor->cost_map, HASH_KEY(nRepMatchOffset, nNumLiterals, i), &nValue))
return nValue;
}
int nMinMatchSize = pCompressor->min_match_size;
int m;
const lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int nBestCost, nBestMatchLen, nBestMatchOffset, nTmpMatchLen, nTmpMatchOffset;
int nLiteralsCost;
nBestCost = 8 + lzsa_get_forward_cost_v2(pCompressor, i + 1, nEndOffset, nNumLiterals + 1, nRepMatchOffset, nDepth + 1, &nTmpMatchLen, &nTmpMatchOffset);
nBestMatchLen = 0;
nBestMatchOffset = 0;
nLiteralsCost = lzsa_get_literals_varlen_size_v2(nNumLiterals);
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
if (pMatch[m].length > 30) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
int nMatchOffsetSize;
if (nRepMatchOffset == pMatch[m].offset)
nMatchOffsetSize = 0;
else {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
}
nCurCost = 8 + nLiteralsCost + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
nCurCost += lzsa_get_forward_cost_v2(pCompressor, i + nMatchLen, nEndOffset, 0, pMatch[m].offset, nDepth + 1, &nTmpMatchLen, &nTmpMatchOffset);
if (nBestCost >= nCurCost) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V2)
nMatchRunLen = MATCH_RUN_LEN_V2;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
int nMatchOffsetSize;
if (nRepMatchOffset == pMatch[m].offset)
nMatchOffsetSize = 0;
else {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
}
nCurCost = 8 + nLiteralsCost + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += lzsa_get_forward_cost_v2(pCompressor, i + k, nEndOffset, 0, pMatch[m].offset, nDepth + 1, &nTmpMatchLen, &nTmpMatchOffset);
if (nBestCost >= nCurCost) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
int nMatchOffsetSize;
if (nRepMatchOffset == pMatch[m].offset)
nMatchOffsetSize = 0;
else {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
}
nCurCost = 8 + nLiteralsCost + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
nCurCost += lzsa_get_forward_cost_v2(pCompressor, i + k, nEndOffset, 0, pMatch[m].offset, nDepth + 1, &nTmpMatchLen, &nTmpMatchOffset);
if (nBestCost >= nCurCost) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
}
}
*pBestMatchLen = nBestMatchLen;
*pBestMatchOffset = nBestMatchOffset;
lzsa_hashmap_insert(&pCompressor->cost_map, HASH_KEY(nRepMatchOffset, nNumLiterals, i), nBestCost);
return nBestCost;
}
/**
* Attempt to further improve the selected optimal matches with a chain-N forward parser pass
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nRepMatchOffset = 0;
int *cost = (int*)pCompressor->pos_data; /* Reuse */
lzsa_hashmap_init(&pCompressor->cost_map);
for (i = nStartOffset; i < nEndOffset; ) {
int nBestMatchLen = 0, nBestMatchOffset = 0;
int nBestCost = lzsa_get_forward_cost_v2(pCompressor, i, nEndOffset, nNumLiterals, nRepMatchOffset, 0, &nBestMatchLen, &nBestMatchOffset);
lzsa_hashmap_clear(&pCompressor->cost_map);
lzsa_match *pMatch = pCompressor->improved_match + i;
if (nBestCost < cost[i]) {
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
}
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
nNumLiterals = 0;
nRepMatchOffset = pMatch->offset;
i += pMatch->length;
}
else {
nNumLiterals++;
i++;
}
}
lzsa_hashmap_clear(&pCompressor->cost_map);
}
/**
* Calculate compressed size
*
* @param pCompressor compression context
* @param pBestMatch optimal matches to evaluate
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return compressed size, in bits
*/
static int lzsa_get_compressed_size_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nCompressedSize = 0;
int nRepMatchOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nOffsetSize;
if (nMatchOffset == nRepMatchOffset) {
nOffsetSize = 0;
}
else {
if (nMatchOffset <= 32) {
nOffsetSize = 4;
}
else if (nMatchOffset <= 512) {
nOffsetSize = 8;
}
else if (nMatchOffset <= (8192 + 512)) {
nOffsetSize = 12;
}
else {
nOffsetSize = 16;
}
}
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
nRepMatchOffset = nMatchOffset;
i += nMatchLen;
}
else {
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nCompressedSize += 8 + 4 + 8;
}
return nCompressedSize;
}
/**
* Emit block of compressed data
*
@ -1057,44 +969,19 @@ static int lzsa_write_raw_uncompressed_block_v2(lzsa_compressor *pCompressor, co
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int nResult;
lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_match *pBestMatch;
if (pCompressor->max_forward_depth > 0) {
memcpy(pCompressor->improved_match, pCompressor->best_match, nInDataSize * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
else
lzsa_optimize_backward_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pCompressor->best_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pInWindow, pCompressor->best_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pCompressor->improved_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
int nBestCost = lzsa_get_compressed_size_v2(pCompressor, pCompressor->best_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nImprovedCost = lzsa_get_compressed_size_v2(pCompressor, pCompressor->improved_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (nBestCost > nImprovedCost)
pBestMatch = pCompressor->improved_match;
else
pBestMatch = pCompressor->best_match;
}
else {
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pCompressor->best_match, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
pBestMatch = pCompressor->best_match;
}
nResult = lzsa_write_block_v2(pCompressor, pBestMatch, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
nResult = lzsa_write_block_v2(pCompressor, pCompressor->best_match, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
if (nResult < 0 && pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
nResult = lzsa_write_raw_uncompressed_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

View File

@ -1,5 +1,5 @@
/*
* shrink_v2.h - LZSA2 block compressor definitions
* shrink_block_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*

View File

@ -61,15 +61,14 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->match = NULL;
pCompressor->selected_match = NULL;
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->slot_cost = NULL;
pCompressor->repmatch_opt = NULL;
pCompressor->arrival = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > nMaxMinMatchForFormat)
pCompressor->min_match_size = nMaxMinMatchForFormat;
pCompressor->max_forward_depth = 0;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->safe_dist = 0;
@ -82,36 +81,42 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->selected_match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
pCompressor->arrival = (lzsa_arrival *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_arrival));
if (pCompressor->selected_match) {
if (pCompressor->arrival || (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
if (pCompressor->format_version == 2) {
pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->improved_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
pCompressor->selected_match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->improved_match) {
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
if (pCompressor->selected_match) {
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
if (pCompressor->slot_cost) {
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
if (pCompressor->slot_cost) {
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
if (pCompressor->repmatch_opt)
return 0;
if (pCompressor->repmatch_opt)
return 0;
}
}
}
else {
return 0;
}
}
}
}
else {
return 0;
else {
return 0;
}
}
}
}
@ -131,6 +136,11 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->arrival) {
free(pCompressor->arrival);
pCompressor->arrival = NULL;
}
if (pCompressor->repmatch_opt) {
free(pCompressor->repmatch_opt);
pCompressor->repmatch_opt = NULL;
@ -141,11 +151,6 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
pCompressor->slot_cost = NULL;
}
if (pCompressor->improved_match) {
free(pCompressor->improved_match);
pCompressor->improved_match = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
@ -202,7 +207,8 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pI
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0 || pCompressor->format_version < 2)
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
nCompressedSize = lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);

View File

@ -41,7 +41,9 @@ extern "C" {
#endif
#define LCP_BITS 14
#define LCP_MAX (1U<<(LCP_BITS - 1))
#define TAG_BITS 3
#define LCP_MAX (1U<<(LCP_BITS - TAG_BITS - 1))
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (31-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
@ -71,6 +73,19 @@ typedef struct _lzsa_repmatch_opt {
short expected_repmatch;
} lzsa_repmatch_opt;
/** Forward arrival slot */
typedef struct {
int cost;
int from_pos;
short from_slot;
unsigned short rep_offset;
int num_literals;
unsigned short match_offset;
unsigned short match_len;
} lzsa_arrival;
/** Compression context */
typedef struct _lzsa_compressor {
divsufsort_ctx_t divsufsort_context;
@ -80,11 +95,10 @@ typedef struct _lzsa_compressor {
lzsa_match *match;
lzsa_match *selected_match;
lzsa_match *best_match;
lzsa_match *improved_match;
int *slot_cost;
lzsa_repmatch_opt *repmatch_opt;
lzsa_arrival *arrival;
int min_match_size;
int max_forward_depth;
int format_version;
int flags;
int safe_dist;

View File

@ -84,21 +84,6 @@ size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer,
}
}
if ((compressor.flags & LZSA_FLAG_FAVOR_RATIO)) {
if (nInputSize < 16384)
compressor.max_forward_depth = 25;
else {
if (nInputSize < 32768)
compressor.max_forward_depth = 15;
else {
if (nInputSize < BLOCK_SIZE)
compressor.max_forward_depth = 10;
else
compressor.max_forward_depth = 0;
}
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;

View File

@ -70,6 +70,7 @@ static void lzsa_delete_file(const char *pszInFilename) {
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
@ -127,6 +128,7 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
@ -200,21 +202,6 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
}
nDictionaryDataSize = 0;
if (nNumBlocks == 0 && (compressor.flags & LZSA_FLAG_FAVOR_RATIO)) {
if (nInDataSize < 16384)
compressor.max_forward_depth = 25;
else {
if (nInDataSize < 32768)
compressor.max_forward_depth = 15;
else {
if (nInDataSize < BLOCK_SIZE)
compressor.max_forward_depth = 10;
else
compressor.max_forward_depth = 0;
}
}
}
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, ((nInDataSize + nRawPadding) >= BLOCK_SIZE) ? BLOCK_SIZE : (nInDataSize + nRawPadding));

View File

@ -57,6 +57,7 @@ typedef enum _lzsa_status_t lzsa_status_t;
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
@ -80,6 +81,7 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/