Check in LZSA2 implementation (ratio competitive with ZX7, faster decompression)

This commit is contained in:
emmanuel-marty 2019-05-09 16:51:29 +02:00
parent 49b0739050
commit 8b7b4a2b4f
26 changed files with 2982 additions and 936 deletions

View File

@ -1,3 +1,3 @@
The LZSA code is available under the Zlib license, except for src/shrink.c which is placed under the Creative Commons CC0 license. The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information. Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.

View File

@ -10,10 +10,14 @@ $(OBJDIR)/%.o: src/../%.c
APP := lzsa APP := lzsa
OBJS := $(OBJDIR)/src/main.o OBJS := $(OBJDIR)/src/lzsa.o
OBJS += $(OBJDIR)/src/frame.o OBJS += $(OBJDIR)/src/frame.o
OBJS += $(OBJDIR)/src/shrink.o OBJS += $(OBJDIR)/src/lib.o
OBJS += $(OBJDIR)/src/expand.o OBJS += $(OBJDIR)/src/matchfinder.o
OBJS += $(OBJDIR)/src/shrink_v1.o
OBJS += $(OBJDIR)/src/shrink_v2.o
OBJS += $(OBJDIR)/src/expand_v1.o
OBJS += $(OBJDIR)/src/expand_v2.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o

View File

@ -40,7 +40,7 @@ Inspirations:
License: License:
* The LZSA code is available under the Zlib license. * The LZSA code is available under the Zlib license.
* The compressor (shrink.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder. * The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
# Stream format # Stream format

View File

@ -177,24 +177,32 @@
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClInclude Include="..\src\expand.h" /> <ClInclude Include="..\src\expand_v1.h" />
<ClInclude Include="..\src\expand_v2.h" />
<ClInclude Include="..\src\format.h" /> <ClInclude Include="..\src\format.h" />
<ClInclude Include="..\src\frame.h" /> <ClInclude Include="..\src\frame.h" />
<ClInclude Include="..\src\lib.h" />
<ClInclude Include="..\src\libdivsufsort\include\config.h" /> <ClInclude Include="..\src\libdivsufsort\include\config.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" /> <ClInclude Include="..\src\libdivsufsort\include\divsufsort.h" />
<ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" /> <ClInclude Include="..\src\libdivsufsort\include\divsufsort_private.h" />
<ClInclude Include="..\src\shrink.h" /> <ClInclude Include="..\src\matchfinder.h" />
<ClInclude Include="..\src\shrink_v1.h" />
<ClInclude Include="..\src\shrink_v2.h" />
<ClInclude Include="pch.h" /> <ClInclude Include="pch.h" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\src\expand.c" /> <ClCompile Include="..\src\expand_v1.c" />
<ClCompile Include="..\src\expand_v2.c" />
<ClCompile Include="..\src\frame.c" /> <ClCompile Include="..\src\frame.c" />
<ClCompile Include="..\src\lib.c" />
<ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" /> <ClCompile Include="..\src\libdivsufsort\lib\divsufsort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\sssort.c" /> <ClCompile Include="..\src\libdivsufsort\lib\sssort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\trsort.c" /> <ClCompile Include="..\src\libdivsufsort\lib\trsort.c" />
<ClCompile Include="..\src\libdivsufsort\lib\utils.c" /> <ClCompile Include="..\src\libdivsufsort\lib\utils.c" />
<ClCompile Include="..\src\main.c" /> <ClCompile Include="..\src\lzsa.c" />
<ClCompile Include="..\src\shrink.c" /> <ClCompile Include="..\src\matchfinder.c" />
<ClCompile Include="..\src\shrink_v1.c" />
<ClCompile Include="..\src\shrink_v2.c" />
</ItemGroup> </ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets"> <ImportGroup Label="ExtensionTargets">

View File

@ -27,15 +27,9 @@
<ClInclude Include="pch.h"> <ClInclude Include="pch.h">
<Filter>Fichiers d%27en-tête</Filter> <Filter>Fichiers d%27en-tête</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\shrink.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\format.h"> <ClInclude Include="..\src\format.h">
<Filter>Fichiers sources</Filter> <Filter>Fichiers sources</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\expand.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\libdivsufsort\include\config.h"> <ClInclude Include="..\src\libdivsufsort\include\config.h">
<Filter>Fichiers sources\libdivsufsort\include</Filter> <Filter>Fichiers sources\libdivsufsort\include</Filter>
</ClInclude> </ClInclude>
@ -48,17 +42,26 @@
<ClInclude Include="..\src\frame.h"> <ClInclude Include="..\src\frame.h">
<Filter>Fichiers sources</Filter> <Filter>Fichiers sources</Filter>
</ClInclude> </ClInclude>
<ClInclude Include="..\src\expand_v2.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_v2.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\expand_v1.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\shrink_v1.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\matchfinder.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\lib.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="..\src\main.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\libdivsufsort\lib\utils.c"> <ClCompile Include="..\src\libdivsufsort\lib\utils.c">
<Filter>Fichiers sources\libdivsufsort\lib</Filter> <Filter>Fichiers sources\libdivsufsort\lib</Filter>
</ClCompile> </ClCompile>
@ -74,5 +77,26 @@
<ClCompile Include="..\src\frame.c"> <ClCompile Include="..\src\frame.c">
<Filter>Fichiers sources</Filter> <Filter>Fichiers sources</Filter>
</ClCompile> </ClCompile>
<ClCompile Include="..\src\expand_v2.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_v2.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\expand_v1.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\shrink_v1.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\matchfinder.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\lib.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\lzsa.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -1,5 +1,5 @@
; ----------------------------------------------------------------------------- ; -----------------------------------------------------------------------------
; Decompress raw LZSA block. Create one with lzsa -r <original_file> <compressed_file> ; Decompress raw LZSA1 block. Create one with lzsa -r <original_file> <compressed_file>
; ;
; in: ; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
@ -31,7 +31,7 @@
OFFSLO = $43 ; zero-page location for temp offset OFFSLO = $43 ; zero-page location for temp offset
OFFSHI = $44 OFFSHI = $44
DECOMPRESS_LZSA DECOMPRESS_LZSA1
LDY #$00 LDY #$00
DECODE_TOKEN DECODE_TOKEN

245
asm/6502/decompress_v2.asm Executable file
View File

@ -0,0 +1,245 @@
; -----------------------------------------------------------------------------
; Decompress raw LZSA2 block.
; Create one with lzsa -r -f2 <original_file> <compressed_file>
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
;
; out:
; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
; -----------------------------------------------------------------------------
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
; -----------------------------------------------------------------------------
OFFSLO = $43 ; zero-page location for temp offset
OFFSHI = $44
FIXUP = $4B
NIBBLES = $FB
NIBCOUNT = $FC
DECOMPRESS_LZSA2
LDY #$00
STY NIBBLES
STY NIBCOUNT
DECODE_TOKEN
JSR GETSRC ; read token byte: XYZ|LL|MMM
PHA ; preserve token on stack
AND #$18 ; isolate literals count (LL)
BEQ NO_LITERALS ; skip if no literals to copy
CMP #$18 ; LITERALS_RUN_LEN_V2 << 3?
BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
CLC ; add nibble to len from token
ADC #$03 ; (LITERALS_RUN_LEN_V2)
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BNE PREPARE_COPY_LITERALS ; if less, literals count is complete
JSR GETSRC ; get extra byte of variable literals count
TAX ; non-zero?
BNE PREPARE_COPY_LITERALS_HIGH ; if so, literals count is complete
; handle 16 bits literals count
; literals count = directly these 16 bits
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
JMP PREPARE_COPY_LITERALS_HIGH
EMBEDDED_LITERALS
LSR A ; shift literals count into place
LSR A
LSR A
PREPARE_COPY_LITERALS
TAX
PREPARE_COPY_LITERALS_HIGH
INY
COPY_LITERALS
JSR GETPUT ; copy one byte of literals
DEX
BNE COPY_LITERALS
DEY
BNE COPY_LITERALS
NO_LITERALS
PLA ; retrieve token from stack
PHA ; preserve token again
BMI REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
ASL ; 0YZ: 5 or 9 bit offset
BMI OFFSET_9_BIT
; 00Z: 5 bit offset
LSR A ; Shift Z (offset bit 4) in place
LSR A
AND #$10
STA FIXUP
JSR GETNIBBLE ; get nibble for offset bits 0-3
ORA FIXUP ; merge offset bit 4
ORA #$E0 ; set offset bits 7-5 to 1
TAX ; store low byte of match offset
LDA #$0FF ; set offset bits 15-8 to 1
BNE GOT_OFFSET ; (*same as JMP GOT_OFFSET but shorter)
OFFSET_9_BIT ; 01Z: 9 bit offset
ASL ; shift Z (offset bit 8) in place
ROL
ROL
ORA #$FE ; set offset bits 15-9 to 1
STA OFFSHI
JSR GETSRC ; get offset bits 0-7 from stream in A
TAX ; store low byte of match offset
JMP GOT_OFFSET_LO ; go prepare match
REPMATCH_OR_LARGE_OFFSET
ASL ; 13 bit offset?
BMI REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
; 10Z: 13 bit offset
LSR A ; shift Z (offset bit 4) in place
LSR A
AND #$10
STA FIXUP
JSR GETSRC ; get offset bits 0-7 from stream in A
TAX ; store low byte of match offset
JSR GETNIBBLE ; get nibble for offset bits 8-11
ORA FIXUP ; merge offset bit 12
CLC
ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET ; go prepare match (*same as JMP GOT_OFFSET but shorter)
REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
ASL ; XYZ=111?
BMI REP_MATCH ; reuse previous offset if so (rep-match)
; 110: handle 16 bit offset
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
GOT_OFFSET
STA OFFSHI ; store final match offset
GOT_OFFSET_LO
STX OFFSLO
REP_MATCH
CLC ; add dest + match offset
LDA PUTDST+1 ; low 8 bits
ADC OFFSLO
STA COPY_MATCH_LOOP+1 ; store back reference address
LDA OFFSHI ; high 8 bits
ADC PUTDST+2
STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
PLA ; retrieve token from stack again
AND #$07 ; isolate match len (MMM)
CLC
ADC #$02 ; add MIN_MATCH_SIZE_V2
CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
CLC ; add nibble to len from token
ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2)
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BNE PREPARE_COPY_MATCH ; if less, match length is complete
JSR GETSRC ; get extra byte of variable match length
TAX ; non-zero?
BNE PREPARE_COPY_MATCH_Y ; if so, the match length is complete
; Handle 16 bits match length
JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
TAY ; put high 8 bits in Y
; large match length with zero high byte?
BEQ DECOMPRESSION_DONE ; if so, this is the EOD code, bail
TXA
PREPARE_COPY_MATCH
TAX
PREPARE_COPY_MATCH_Y
INY
COPY_MATCH_LOOP
LDA $AAAA ; get one byte of backreference
INC COPY_MATCH_LOOP+1
BNE GETMATCH_DONE
INC COPY_MATCH_LOOP+2
GETMATCH_DONE
JSR PUTDST ; copy to destination
DEX
BNE COPY_MATCH_LOOP
DEY
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETNIBBLE
DEC NIBCOUNT
BPL HAS_NIBBLES
LDA #$01
STA NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR A
LSR A
LSR A
LSR A
RTS
HAS_NIBBLES
LDA NIBBLES
AND #$0F ; isolate low 4 bits of nibble
RTS
GETPUT
JSR GETSRC
PUTDST
LZSA_DST_LO = *+1
LZSA_DST_HI = *+2
STA $AAAA
INC PUTDST+1
BNE PUTDST_DONE
INC PUTDST+2
PUTDST_DONE
DECOMPRESSION_DONE
RTS
GETLARGESRC
JSR GETSRC ; grab low 8 bits
TAX ; move to X
; fall through grab high 8 bits
GETSRC
LZSA_SRC_LO = *+1
LZSA_SRC_HI = *+2
LDA $AAAA
INC GETSRC+1
BNE GETSRC_DONE
INC GETSRC+2
GETSRC_DONE
RTS

View File

@ -22,15 +22,15 @@
bits 16 bits 16
; --------------------------------------------------------------------------- ; ---------------------------------------------------------------------------
; Decompress raw LZSA block ; Decompress raw LZSA1 block
; inputs: ; inputs:
; * ds:si: raw LZSA block ; * ds:si: raw LZSA1 block
; * es:di: output buffer ; * es:di: output buffer
; output: ; output:
; * ax: decompressed size ; * ax: decompressed size
; --------------------------------------------------------------------------- ; ---------------------------------------------------------------------------
lzsa_decompress: lzsa1_decompress:
push di ; remember decompression offset push di ; remember decompression offset
cld ; make string operations (lods, movs, stos..) move forward cld ; make string operations (lods, movs, stos..) move forward

174
asm/8088/decompress_small_v2.S Executable file
View File

@ -0,0 +1,174 @@
; decompress_small.S - space-efficient decompressor implementation for 8088
;
; Copyright (C) 2019 Emmanuel Marty
;
; This software is provided 'as-is', without any express or implied
; warranty. In no event will the authors be held liable for any damages
; arising from the use of this software.
;
; Permission is granted to anyone to use this software for any purpose,
; including commercial applications, and to alter it and redistribute it
; freely, subject to the following restrictions:
;
; 1. The origin of this software must not be misrepresented; you must not
; claim that you wrote the original software. If you use this software
; in a product, an acknowledgment in the product documentation would be
; appreciated but is not required.
; 2. Altered source versions must be plainly marked as such, and must not be
; misrepresented as being the original software.
; 3. This notice may not be removed or altered from any source distribution.
segment .text
bits 16
; ---------------------------------------------------------------------------
; Decompress raw LZSA2 block
; inputs:
; * ds:si: raw LZSA2 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
lzsa2_decompress:
push di ; remember decompression offset
cld ; make string operations (lods, movs, stos..) move forward
xor cx,cx
xor bx,bx
xor bp,bp
.decode_token:
mov ax,cx ; clear ah - cx is zero from above or from after rep movsb in .copy_match
lodsb ; read token byte: XYZ|LL|MMMM
mov dx,ax ; keep token in dl
and al,018H ; isolate literals length in token (LL)
mov cl,3
shr al,cl ; shift literals length into place
cmp al,03H ; LITERALS_RUN_LEN_V2?
jne .got_literals ; no, we have the full literals count from the token, go copy
call .get_nibble ; get extra literals length nibble
add al,cl ; add len from token to nibble
cmp al,012H ; LITERALS_RUN_LEN_V2 + 15 ?
jne .got_literals ; if not, we have the full literals count, go copy
lodsb ; grab extra length byte
test al,al ; zero?
jne .got_literals ; if not, we have the full literals count, go copy
lodsw ; grab 16-bit extra length
.got_literals:
xchg cx,ax
rep movsb ; copy cx literals from ds:si to es:di
test dl,dl ; check match offset mode in token (X bit)
js .rep_match_or_large_offset
cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit)
jnb .offset_9_bit
; 5 bit offset
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
mov al,020H ; shift Z (offset bit 4) in place
and al,dl
shr al,1
call .get_nibble ; get nibble for offset bits 0-3
or al,cl ; merge nibble
or al,0E0H ; set offset bits 7-5 to 1
dec ah ; set offset bits 15-8 to 1
jmp short .get_match_length
.offset_9_bit: ; 9 bit offset
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
lodsb ; get 8 bit offset from stream in A
dec ah ; set offset bits 15-8 to 1
test dl,020H ; test bit Z (offset bit 8)
jne .get_match_length
dec ah ; clear bit 8 if Z bit is clear
jmp short .get_match_length
.rep_match_or_large_offset:
cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
jnb .rep_match_or_16_bit
; 13 bit offset
lodsb ; load match offset bits 0-7
mov ah,020H ; shift Z (offset bit 12) in place
and ah,dl
shr ah,1
call .get_nibble ; get nibble for offset bits 8-11
or ah,cl ; merge nibble
or ah,0E0H ; set offset bits 15-13 to 1
sub ah,2 ; substract 512
jmp short .get_match_length
.rep_match_or_16_bit:
test dl,020H ; test bit Z (offset bit 8)
jne .repeat_match ; rep-match
; 16 bit offset
lodsw ; Get 2-byte match offset
.get_match_length:
mov bp,ax ; bp: offset
.repeat_match:
mov ax,dx ; ax: original token
and al,07H ; isolate match length in token (MMM)
add al,2 ; add MIN_MATCH_SIZE_V2
cmp al,09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
jne .got_matchlen ; no, we have the full match length from the token, go copy
call .get_nibble ; get extra literals length nibble
add al,cl ; add len from token to nibble
cmp al,018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
jne .got_matchlen ; no, we have the full match length from the token, go copy
lodsb ; grab extra length byte
test al,al ; zero?
jne .got_matchlen ; if not, we have the entire length
lodsw ; grab 16-bit length
test ax,ax ; bail if we hit EOD
je short .done_decompressing
.got_matchlen:
xchg cx,ax ; copy match length into cx
push ds ; save ds:si (current pointer to compressed data)
xchg si,ax
push es
pop ds
mov si,di ; ds:si now points at back reference in output data
add si,bp
rep movsb ; copy match
xchg si,ax ; restore ds:si
pop ds
jmp .decode_token ; go decode another token
.done_decompressing:
pop ax ; retrieve the original decompression offset
xchg ax,di ; compute decompressed size
sub ax,di
ret ; done
.get_nibble:
dec bh ; nibble ready?
jns .has_nibble
mov cx,ax
lodsb ; load two nibbles
mov bl,al
mov bh,1
mov ax,cx
.has_nibble:
mov cl,4 ; swap 4 high and low bits of nibble
ror bl,cl
mov cl,0FH
and cl,bl
ret

56
src/expand.c → src/expand_v1.c Executable file → Normal file
View File

@ -1,5 +1,5 @@
/* /*
* expand.c - block decompressor implementation * expand_v1.c - LZSA1 block decompressor implementation
* *
* Copyright (C) 2019 Emmanuel Marty * Copyright (C) 2019 Emmanuel Marty
* *
@ -20,11 +20,21 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include "format.h" #include "format.h"
#include "expand.h" #include "expand_v1.h"
#ifdef _MSC_VER #ifdef _MSC_VER
#define FORCE_INLINE __forceinline #define FORCE_INLINE __forceinline
@ -32,11 +42,11 @@
#define FORCE_INLINE __attribute__((always_inline)) #define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */ #endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { static inline FORCE_INLINE int lzsa_expand_literals_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
const unsigned char *pInBlock = *ppInBlock; const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData; unsigned char *pCurOutData = *ppCurOutData;
if (nLiterals == LITERALS_RUN_LEN) { if (nLiterals == LITERALS_RUN_LEN_V1) {
unsigned char nByte; unsigned char nByte;
if (pInBlock < pInBlockEnd) { if (pInBlock < pInBlockEnd) {
@ -83,12 +93,12 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
return 0; return 0;
} }
static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
const unsigned char *pInBlock = *ppInBlock; const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData; unsigned char *pCurOutData = *ppCurOutData;
nMatchLen += MIN_MATCH_SIZE; nMatchLen += MIN_MATCH_SIZE_V1;
if (nMatchLen == (MATCH_RUN_LEN + MIN_MATCH_SIZE)) { if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
unsigned char nByte; unsigned char nByte;
if (pInBlock < pInBlockEnd) { if (pInBlock < pInBlockEnd) {
@ -159,7 +169,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
} }
/** /**
* Decompress one data block * Decompress one LZSA1 data block
* *
* @param pInBlock pointer to compressed data * @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes * @param nInBlockSize size of compressed data, in bytes
@ -169,7 +179,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
* *
* @return size of decompressed data in bytes, or -1 for error * @return size of decompressed data in bytes, or -1 for error
*/ */
int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize; const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset; unsigned char *pCurOutData = pOutData + nOutDataOffset;
@ -182,36 +192,35 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
const unsigned char token = *pInBlock++; const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (nLiterals < LITERALS_RUN_LEN) { if (nLiterals < LITERALS_RUN_LEN_V1) {
memcpy(pCurOutData, pInBlock, 8); memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals; pInBlock += nLiterals;
pCurOutData += nLiterals; pCurOutData += nLiterals;
} }
else { else {
if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1; return -1;
} }
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset; int nMatchOffset;
nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff)); nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
if (token & 0x80) { if (token & 0x80) {
nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8); nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
} }
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset; const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) { if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f); unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen < MATCH_RUN_LEN && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { if (nMatchLen < MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 8); memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8); memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 4); memcpy(pCurOutData + 16, pSrc + 16, 4);
pCurOutData += (MIN_MATCH_SIZE + nMatchLen); pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
} }
else { else {
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1; return -1;
} }
} }
@ -227,22 +236,21 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
const unsigned char token = *pInBlock++; const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1; return -1;
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset; int nMatchOffset;
nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff)); nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
if (token & 0x80) { if (token & 0x80) {
nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8); nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
} }
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset; const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) { if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f); unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1; return -1;
} }
else { else {

22
src/expand.h → src/expand_v1.h Executable file → Normal file
View File

@ -1,5 +1,5 @@
/* /*
* expand.h - block decompressor definitions * expand_v1.h - LZSA1 block decompressor definitions
* *
* Copyright (C) 2019 Emmanuel Marty * Copyright (C) 2019 Emmanuel Marty
* *
@ -20,11 +20,21 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
#ifndef _EXPAND_H /*
#define _EXPAND_H * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_V1_H
#define _EXPAND_V1_H
/** /**
* Decompress one data block * Decompress one LZSA1 data block
* *
* @param pInBlock pointer to compressed data * @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes * @param nInBlockSize size of compressed data, in bytes
@ -34,6 +44,6 @@
* *
* @return size of decompressed data in bytes, or -1 for error * @return size of decompressed data in bytes, or -1 for error
*/ */
int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_H */ #endif /* _EXPAND_V1_H */

330
src/expand_v2.c Normal file
View File

@ -0,0 +1,330 @@
/*
* expand_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_v2.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles) {
unsigned int nValue;
if ((*nCurNibbles ^= 1) != 0) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock >= pInBlockEnd) return -1;
(*nibbles) = *pInBlock++;
*ppInBlock = pInBlock;
}
nValue = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
(*nibbles) <<= 4;
return nValue;
}
static inline FORCE_INLINE int lzsa_expand_literals_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, int *nCurNibbles, unsigned char *nibbles,
unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
if (nLiterals == LITERALS_RUN_LEN_V2) {
nLiterals += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
if (nLiterals == (LITERALS_RUN_LEN_V2 + 15)) {
if (pInBlock < pInBlockEnd) {
nLiterals = ((unsigned int)*pInBlock++);
if (nLiterals == 0) {
if ((pInBlock + 1) < pInBlockEnd) {
nLiterals = ((unsigned int)*pInBlock++);
nLiterals |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
*ppCurOutData = pCurOutData;
return 0;
}
static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, int *nCurNibbles, unsigned char *nibbles,
unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
nMatchLen += MIN_MATCH_SIZE_V2;
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
nMatchLen += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 + 15)) {
if (pInBlock < pInBlockEnd) {
nMatchLen = ((unsigned int)*pInBlock++);
if (nMatchLen == 0) {
if ((pInBlock + 1) < pInBlockEnd) {
nMatchLen = ((unsigned int)*pInBlock++);
nMatchLen |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 8);
memcpy(pCopyDst + 8, pCopySrc + 8, 8);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen >= 4) {
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
*pCurOutData++ = *pSrc++;
nMatchLen -= 4;
}
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
*ppInBlock = pInBlock;
*ppCurOutData = pCurOutData;
return 0;
}
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
int nCurNibbles = 0;
unsigned char nibbles;
int nMatchOffset = 0;
/* Fast loop */
while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
if (nLiterals < LITERALS_RUN_LEN_V2) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
return -1;
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
switch (nOffsetMode) {
case 0x00:
/* 5 bit offset */
nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
nMatchOffset |= ((token & 0x20) >> 1);
nMatchOffset |= 0xffffffe0;
break;
case 0x40:
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset |= 0xfffffe00;
break;
case 0x80:
/* 13 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
nMatchOffset |= 0xffffe000;
nMatchOffset -= 512;
break;
default:
/* Check if this is a 16 bit offset or a rep-match */
if ((token & 0x20) == 0) {
/* 16 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
nMatchOffset |= 0xffff0000;
}
break;
}
const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (nMatchLen < MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 4);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
}
else {
if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
}
else {
return -1;
}
}
}
/* Slow loop for the remainder of the buffer */
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
return -1;
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
switch (nOffsetMode) {
case 0x00:
/* 5 bit offset */
nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
nMatchOffset |= ((token & 0x20) >> 1);
nMatchOffset |= 0xffffffe0;
break;
case 0x40:
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset |= 0xfffffe00;
break;
case 0x80:
/* 13 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
nMatchOffset |= 0xffffe000;
nMatchOffset -= 512;
break;
default:
/* Check if this is a 16 bit offset or a rep-match */
if ((token & 0x20) == 0) {
/* 16 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
nMatchOffset |= 0xffff0000;
}
break;
}
const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

49
src/expand_v2.h Normal file
View File

@ -0,0 +1,49 @@
/*
* expand_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_V2_H
#define _EXPAND_V2_H
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V2_H */

View File

@ -20,13 +20,28 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FORMAT_H #ifndef _FORMAT_H
#define _FORMAT_H #define _FORMAT_H
#define MIN_MATCH_SIZE 3
#define MIN_OFFSET 1 #define MIN_OFFSET 1
#define MAX_OFFSET 0xffff #define MAX_OFFSET 0xffff
#define LITERALS_RUN_LEN 7
#define MATCH_RUN_LEN 15 #define MIN_MATCH_SIZE_V1 3
#define LITERALS_RUN_LEN_V1 7
#define MATCH_RUN_LEN_V1 15
#define MIN_MATCH_SIZE_V2 2
#define LITERALS_RUN_LEN_V2 3
#define MATCH_RUN_LEN_V2 7
#endif /* _FORMAT_H */ #endif /* _FORMAT_H */

View File

@ -20,9 +20,18 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h> #include <stdlib.h>
#include "frame.h" #include "frame.h"
#include "shrink.h"
#define LZSA_ID_0 0x7b #define LZSA_ID_0 0x7b
#define LZSA_ID_1 0x9e #define LZSA_ID_1 0x9e
@ -53,11 +62,11 @@ int lzsa_get_frame_size(void) {
* *
* @return number of encoded bytes, or -1 for failure * @return number of encoded bytes, or -1 for failure
*/ */
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize) { int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
if (nMaxFrameDataSize >= 3) { if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */ pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1; pFrameData[1] = LZSA_ID_1;
pFrameData[2] = 0; /* Format version 1 */ pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0; /* Format version 1 */
return 3; return 3;
} }
@ -139,14 +148,16 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
* *
* @return 0 for success, or -1 for failure * @return 0 for success, or -1 for failure
*/ */
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize) { int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) {
if (nFrameDataSize != 3 || if (nFrameDataSize != 3 ||
pFrameData[0] != LZSA_ID_0 || pFrameData[0] != LZSA_ID_0 ||
pFrameData[1] != LZSA_ID_1 || pFrameData[1] != LZSA_ID_1 ||
pFrameData[2] != 0) { (pFrameData[2] & 0x1f) != 0 ||
((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) {
return -1; return -1;
} }
else { else {
*nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1;
return 0; return 0;
} }
} }

View File

@ -20,6 +20,16 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _FRAME_H #ifndef _FRAME_H
#define _FRAME_H #define _FRAME_H
@ -45,7 +55,7 @@ int lzsa_get_frame_size(void);
* *
* @return number of encoded bytes, or -1 for failure * @return number of encoded bytes, or -1 for failure
*/ */
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize); int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
/** /**
* Encode compressed block frame header * Encode compressed block frame header
@ -87,7 +97,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
* *
* @return 0 for success, or -1 for failure * @return 0 for success, or -1 for failure
*/ */
int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize); int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion);
/** /**
* Decode frame header * Decode frame header

217
src/lib.c Executable file
View File

@ -0,0 +1,217 @@
/*
* lib.c - LZSA library implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "matchfinder.h"
#include "shrink_v1.h"
#include "shrink_v2.h"
#include "expand_v1.h"
#include "expand_v2.h"
#include "format.h"
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->best_match = NULL;
pCompressor->slot_cost = NULL;
pCompressor->repmatch_opt = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > 5)
pCompressor->min_match_size = 5;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->num_commands = 0;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
if (pCompressor->slot_cost) {
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
if (pCompressor->repmatch_opt)
return 0;
}
}
}
else {
return 0;
}
}
}
}
}
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->repmatch_opt) {
free(pCompressor->repmatch_opt);
pCompressor->repmatch_opt = NULL;
}
if (pCompressor->slot_cost) {
free(pCompressor->slot_cost);
pCompressor->slot_cost = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
}
else if (pCompressor->format_version == 2) {
return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
}
else {
return -1;
}
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
return pCompressor->num_commands;
}
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
if (nFormatVersion == 1)
return lzsa_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
return lzsa_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
return -1;
}

View File

@ -1,5 +1,5 @@
/* /*
* shrink.h - block compressor definitions * lib.h - LZSA library definitions
* *
* Copyright (C) 2019 Emmanuel Marty * Copyright (C) 2019 Emmanuel Marty
* *
@ -20,8 +20,18 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
#ifndef _SHRINK_H /*
#define _SHRINK_H * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _LIB_H
#define _LIB_H
#include "divsufsort.h" #include "divsufsort.h"
@ -29,17 +39,46 @@
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */ #define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */ #define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
/* Forward declarations */ #define LCP_BITS 15
typedef struct _lzsa_match lzsa_match; #define LCP_MAX (1<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_MASK (((1<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1<<LCP_SHIFT) - 1)
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define LEAVE_ALONE_MATCH_SIZE 1000
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
#define MODESWITCH_PENALTY 1
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
typedef struct _lzsa_repmatch_opt {
int incoming_offset;
short best_slot_for_incoming;
short expected_repmatch;
} lzsa_repmatch_opt;
/** Compression context */ /** Compression context */
typedef struct { typedef struct _lsza_compressor {
divsufsort_ctx_t divsufsort_context; divsufsort_ctx_t divsufsort_context;
unsigned int *intervals; unsigned int *intervals;
unsigned int *pos_data; unsigned int *pos_data;
unsigned int *open_intervals; unsigned int *open_intervals;
lzsa_match *match; lzsa_match *match;
lzsa_match *best_match;
int *slot_cost;
lzsa_repmatch_opt *repmatch_opt;
int min_match_size; int min_match_size;
int format_version;
int flags; int flags;
int num_commands; int num_commands;
} lsza_compressor; } lsza_compressor;
@ -54,7 +93,7 @@ typedef struct {
* *
* @return 0 for success, non-zero for failure * @return 0 for success, non-zero for failure
*/ */
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags); int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
/** /**
* Clean up compression context and free up any associated resources * Clean up compression context and free up any associated resources
@ -84,4 +123,17 @@ int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWind
*/ */
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor); int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
#endif /* _SHRINK_H */ /**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _LIB_H */

View File

@ -1,5 +1,5 @@
/* /*
* main.c - command line compression utility for the LZSA format * lzsa.c - command line compression utility for the LZSA format
* *
* Copyright (C) 2019 Emmanuel Marty * Copyright (C) 2019 Emmanuel Marty
* *
@ -20,6 +20,16 @@
* 3. This notice may not be removed or altered from any source distribution. * 3. This notice may not be removed or altered from any source distribution.
*/ */
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h> #include <stdio.h>
#include <stdbool.h> #include <stdbool.h>
#include <stdlib.h> #include <stdlib.h>
@ -31,17 +41,18 @@
#endif #endif
#include "format.h" #include "format.h"
#include "frame.h" #include "frame.h"
#include "shrink.h" #include "lib.h"
#include "expand.h"
#define BLOCK_SIZE 65536 #define BLOCK_SIZE 65536
#define OPT_VERBOSE 1 #define OPT_VERBOSE 1
#define OPT_RAW 2 #define OPT_RAW 2
#define OPT_FAVOR_RATIO 4 #define OPT_FAVOR_RATIO 4
#define TOOL_VERSION "0.6.0"
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
static long long lzsa_get_time() { static long long do_get_time() {
long long nTime; long long nTime;
#ifdef _WIN32 #ifdef _WIN32
@ -60,7 +71,7 @@ static long long lzsa_get_time() {
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) { static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
FILE *f_in, *f_out; FILE *f_in, *f_out;
unsigned char *pInData, *pOutData; unsigned char *pInData, *pOutData;
lsza_compressor compressor; lsza_compressor compressor;
@ -146,7 +157,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
nFlags |= LZSA_FLAG_FAVOR_RATIO; nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW) if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK; nFlags |= LZSA_FLAG_RAW_BLOCK;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags); nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) { if (nResult != 0) {
free(pOutData); free(pOutData);
pOutData = NULL; pOutData = NULL;
@ -165,7 +176,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
} }
if ((nOptions & OPT_RAW) == 0) { if ((nOptions & OPT_RAW) == 0) {
int nHeaderSize = lzsa_encode_header(cFrameData, 16); int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0) if (nHeaderSize < 0)
bError = true; bError = true;
else { else {
@ -175,7 +186,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
} }
if (nOptions & OPT_VERBOSE) { if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time(); nStartTime = do_get_time();
} }
int nPreviousBlockSize = 0; int nPreviousBlockSize = 0;
@ -280,7 +291,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
nCompressedSize += (long long)nFooterSize; nCompressedSize += (long long)nFooterSize;
if (!bError && (nOptions & OPT_VERBOSE)) { if (!bError && (nOptions & OPT_VERBOSE)) {
nEndTime = lzsa_get_time(); nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
@ -315,7 +326,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
/*---------------------------------------------------------------------------*/ /*---------------------------------------------------------------------------*/
static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL; long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL; long long nOriginalSize = 0LL;
unsigned int nFileSize = 0; unsigned int nFileSize = 0;
@ -338,7 +349,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
return 100; return 100;
} }
if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) { if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile); fclose(pInFile);
pInFile = NULL; pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n"); fprintf(stderr, "invalid magic number or format version in input file\n");
@ -423,7 +434,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
} }
if (nOptions & OPT_VERBOSE) { if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time(); nStartTime = do_get_time();
} }
int nDecompressionError = 0; int nDecompressionError = 0;
@ -476,7 +487,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
else { else {
unsigned int nBlockOffs = 0; unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) { if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize; nDecompressionError = nDecompressedSize;
break; break;
@ -518,7 +529,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
} }
else { else {
if (nOptions & OPT_VERBOSE) { if (nOptions & OPT_VERBOSE) {
nEndTime = lzsa_get_time(); nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n", fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n",
@ -529,7 +540,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
} }
} }
static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL; long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL; long long nOriginalSize = 0LL;
long long nKnownGoodSize = 0LL; long long nKnownGoodSize = 0LL;
@ -553,7 +564,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
return 100; return 100;
} }
if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) { if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile); fclose(pInFile);
pInFile = NULL; pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n"); fprintf(stderr, "invalid magic number or format version in input file\n");
@ -659,7 +670,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
} }
if (nOptions & OPT_VERBOSE) { if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time(); nStartTime = do_get_time();
} }
int nDecompressionError = 0; int nDecompressionError = 0;
@ -715,7 +726,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
else { else {
unsigned int nBlockOffs = 0; unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) { if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize; nDecompressionError = nDecompressedSize;
break; break;
@ -771,7 +782,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
} }
else { else {
if (nOptions & OPT_VERBOSE) { if (nOptions & OPT_VERBOSE) {
nEndTime = lzsa_get_time(); nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n", fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n",
@ -793,9 +804,11 @@ int main(int argc, char **argv) {
bool bCommandDefined = false; bool bCommandDefined = false;
bool bVerifyCompression = false; bool bVerifyCompression = false;
bool bMinMatchDefined = false; bool bMinMatchDefined = false;
bool bFormatVersionDefined = false;
char cCommand = 'z'; char cCommand = 'z';
int nMinMatchSize = MIN_MATCH_SIZE; int nMinMatchSize = 0;
unsigned int nOptions = OPT_FAVOR_RATIO; unsigned int nOptions = OPT_FAVOR_RATIO;
int nFormatVersion = 1;
for (i = 1; i < argc; i++) { for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-d")) { if (!strcmp(argv[i], "-d")) {
@ -840,7 +853,7 @@ int main(int argc, char **argv) {
if (!bMinMatchDefined && (i + 1) < argc) { if (!bMinMatchDefined && (i + 1) < argc) {
char *pEnd = NULL; char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10); nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10);
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) { if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
i++; i++;
bMinMatchDefined = true; bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO); nOptions &= (~OPT_FAVOR_RATIO);
@ -856,7 +869,7 @@ int main(int argc, char **argv) {
if (!bMinMatchDefined) { if (!bMinMatchDefined) {
char *pEnd = NULL; char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10); nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) { if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
bMinMatchDefined = true; bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO); nOptions &= (~OPT_FAVOR_RATIO);
} }
@ -869,7 +882,7 @@ int main(int argc, char **argv) {
} }
else if (!strcmp(argv[i], "--prefer-ratio")) { else if (!strcmp(argv[i], "--prefer-ratio")) {
if (!bMinMatchDefined) { if (!bMinMatchDefined) {
nMinMatchSize = MIN_MATCH_SIZE; nMinMatchSize = 0;
bMinMatchDefined = true; bMinMatchDefined = true;
} }
else else
@ -884,6 +897,35 @@ int main(int argc, char **argv) {
else else
bArgsError = true; bArgsError = true;
} }
else if (!strcmp(argv[i], "-f")) {
if (!bFormatVersionDefined && (i + 1) < argc) {
char *pEnd = NULL;
nFormatVersion = (int)strtol(argv[i + 1], &pEnd, 10);
if (pEnd && pEnd != argv[i + 1] && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
i++;
bFormatVersionDefined = true;
}
else {
bArgsError = true;
}
}
else
bArgsError = true;
}
else if (!strncmp(argv[i], "-f", 2)) {
if (!bFormatVersionDefined) {
char *pEnd = NULL;
nFormatVersion = (int)strtol(argv[i] + 2, &pEnd, 10);
if (pEnd && pEnd != (argv[i] + 2) && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
bFormatVersionDefined = true;
}
else {
bArgsError = true;
}
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-v")) { else if (!strcmp(argv[i], "-v")) {
if ((nOptions & OPT_VERBOSE) == 0) { if ((nOptions & OPT_VERBOSE) == 0) {
nOptions |= OPT_VERBOSE; nOptions |= OPT_VERBOSE;
@ -911,26 +953,28 @@ int main(int argc, char **argv) {
} }
if (bArgsError || !pszInFilename || !pszOutFilename) { if (bArgsError || !pszInFilename || !pszOutFilename) {
fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]); fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
fprintf(stderr, " -c: check resulting stream after compressing\n"); fprintf(stderr, " -c: check resulting stream after compressing\n");
fprintf(stderr, " -d: decompress (default: compress)\n"); fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -v: be verbose\n"); fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -f <value>: LZSA compression format (1-2)\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
fprintf(stderr, " -D <filename>: use dictionary file\n"); fprintf(stderr, " -D <filename>: use dictionary file\n");
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n"); fprintf(stderr, " -m <value>: minimum match size (3-5) (default: 3)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n"); fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n"); fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
return 100; return 100;
} }
if (cCommand == 'z') { if (cCommand == 'z') {
int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize); int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
if (nResult == 0 && bVerifyCompression) { if (nResult == 0 && bVerifyCompression) {
nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions); nResult = do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
} }
} }
else if (cCommand == 'd') { else if (cCommand == 'd') {
return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions); return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
} }
else { else {
return 100; return 100;

294
src/matchfinder.c Normal file
View File

@ -0,0 +1,294 @@
/*
* matchfinder.c - LZ match finder implementation
*
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "matchfinder.h"
#include "format.h"
#include "lib.h"
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i;
/* Compute the permuted LCP first (Kärkkäinen method) */
Phi[intervals[0]] = -1;
for (i = 1; i < nInWindowSize; i++)
Phi[intervals[i]] = intervals[i - 1];
for (i = 0; i < nInWindowSize; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
nCurLen--;
}
/* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
if (i < nInWindowSize)
intervals[i] &= POS_MASK;
/**
* Build intervals for finding matches
*
* Methodology and code fragment taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
unsigned int * const SA_and_LCP = intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int next_interval_idx;
unsigned int *top = pCompressor->open_intervals;
unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
*top = 0;
intervals[0] = 0;
next_interval_idx = 1;
for (int r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
pos_data[prev_pos] = *top;
}
else if (next_lcp > top_lcp) {
/* Opening a new interval */
*++top = next_lcp | next_interval_idx++;
pos_data[prev_pos] = *top;
}
else {
/* Closing the deepest open interval */
pos_data[prev_pos] = *top;
for (;;) {
const unsigned int closed_interval_idx = *top-- & POS_MASK;
const unsigned int superinterval_lcp = *top & LCP_MASK;
if (next_lcp == superinterval_lcp) {
/* Continuing the superinterval */
intervals[closed_interval_idx] = *top;
break;
}
else if (next_lcp > superinterval_lcp) {
/* Creating a new interval that is a
* superinterval of the one being
* closed, but still a subinterval of
* its superinterval */
*++top = next_lcp | next_interval_idx++;
intervals[closed_interval_idx] = *top;
break;
}
else {
/* Also closing the superinterval */
intervals[closed_interval_idx] = *top;
}
}
}
prev_pos = next_pos;
}
/* Close any still-open intervals. */
pos_data[prev_pos] = *top;
for (; top > pCompressor->open_intervals; top--)
intervals[*top & POS_MASK] = *(top - 1);
/* Success */
return 0;
}
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
*
* @return number of matches
*/
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
/**
* Find matches using intervals
*
* Taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[nOffset];
pos_data[nOffset] = 0;
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root;
* (3) an interval last visited by suffix 0
*
* We could avoid the ambiguity with (3) by using an lcp
* placeholder value other than 0 to represent "visited", but
* it's fastest to use 0. So we just don't allow matches with
* position 0. */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref;
matchptr = pMatches;
for (;;) {
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK];
intervals[ref & POS_MASK] = nOffset;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK];
}
return (int)(matchptr - pMatches);
}
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0);
}
}
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
pMatch->length = 0;
pMatch->offset = 0;
}
else {
int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
if (nMaxLen < 0)
nMaxLen = 0;
if (pMatch->length > nMaxLen)
pMatch->length = (unsigned short)nMaxLen;
}
pMatch++;
}
}
}

82
src/matchfinder.h Normal file
View File

@ -0,0 +1,82 @@
/*
* matchfinder.h - LZ match finder definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lsza_compressor lsza_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
*
* @return number of matches
*/
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
#endif /* _MATCHFINDER_H */

View File

@ -1,830 +0,0 @@
/*
* shrink.c - block compressor implementation
*
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
* and related and neighboring rights to this software to the public domain
* worldwide via the Creative Commons Zero 1.0 Universal Public Domain
* Dedication (the "CC0").
*
* This software is distributed in the hope that it will be useful, but WITHOUT
* ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
* FOR A PARTICULAR PURPOSE. See the CC0 for more details.
*
* You should have received a copy of the CC0 along with this software; if not
* see <http://creativecommons.org/publicdomain/zero/1.0/>.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "shrink.h"
#include "format.h"
#define LCP_BITS 15
#define LCP_MAX (1<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_MASK (((1<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1<<LCP_SHIFT) - 1)
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define LEAVE_ALONE_MATCH_SIZE 1000
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
#define MODESWITCH_PENALTY 1
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags) {
int nResult;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < MIN_MATCH_SIZE)
pCompressor->min_match_size = MIN_MATCH_SIZE;
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
pCompressor->flags = nFlags;
pCompressor->num_commands = 0;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->match)
return 0;
}
}
}
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
*
* @return 0 for success, non-zero for failure
*/
static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
int *Phi = PLCP;
int nCurLen = 0;
int i;
/* Compute the permuted LCP first (Kärkkäinen method) */
Phi[intervals[0]] = -1;
for (i = 1; i < nInWindowSize; i++)
Phi[intervals[i]] = intervals[i - 1];
for (i = 0; i < nInWindowSize; i++) {
if (Phi[i] == -1) {
PLCP[i] = 0;
continue;
}
int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
PLCP[i] = nCurLen;
if (nCurLen > 0)
nCurLen--;
}
/* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
* and the interval builder below doesn't need it either. */
intervals[0] &= POS_MASK;
int nMinMatchSize = pCompressor->min_match_size;
for (i = 1; i < nInWindowSize - 1; i++) {
int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize)
nLen = 0;
if (nLen > LCP_MAX)
nLen = LCP_MAX;
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
}
if (i < nInWindowSize)
intervals[i] &= POS_MASK;
/**
* Build intervals for finding matches
*
* Methodology and code fragment taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
unsigned int * const SA_and_LCP = intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int next_interval_idx;
unsigned int *top = pCompressor->open_intervals;
unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
*top = 0;
intervals[0] = 0;
next_interval_idx = 1;
for (int r = 1; r < nInWindowSize; r++) {
const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
const unsigned int top_lcp = *top & LCP_MASK;
if (next_lcp == top_lcp) {
/* Continuing the deepest open interval */
pos_data[prev_pos] = *top;
}
else if (next_lcp > top_lcp) {
/* Opening a new interval */
*++top = next_lcp | next_interval_idx++;
pos_data[prev_pos] = *top;
}
else {
/* Closing the deepest open interval */
pos_data[prev_pos] = *top;
for (;;) {
const unsigned int closed_interval_idx = *top-- & POS_MASK;
const unsigned int superinterval_lcp = *top & LCP_MASK;
if (next_lcp == superinterval_lcp) {
/* Continuing the superinterval */
intervals[closed_interval_idx] = *top;
break;
}
else if (next_lcp > superinterval_lcp) {
/* Creating a new interval that is a
* superinterval of the one being
* closed, but still a subinterval of
* its superinterval */
*++top = next_lcp | next_interval_idx++;
intervals[closed_interval_idx] = *top;
break;
}
else {
/* Also closing the superinterval */
intervals[closed_interval_idx] = *top;
}
}
}
prev_pos = next_pos;
}
/* Close any still-open intervals. */
pos_data[prev_pos] = *top;
for (; top > pCompressor->open_intervals; top--)
intervals[*top & POS_MASK] = *(top - 1);
/* Success */
return 0;
}
/**
* Find matches at the specified offset in the input window
*
* @param pCompressor compression context
* @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none)
*
* @return number of matches
*/
static int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
unsigned int super_ref;
unsigned int match_pos;
lzsa_match *matchptr;
/**
* Find matches using intervals
*
* Taken from wimlib (CC0 license):
* https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
*/
/* Get the deepest lcp-interval containing the current suffix. */
ref = pos_data[nOffset];
pos_data[nOffset] = 0;
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root;
* (3) an interval last visited by suffix 0
*
* We could avoid the ambiguity with (3) by using an lcp
* placeholder value other than 0 to represent "visited", but
* it's fastest to use 0. So we just don't allow matches with
* position 0. */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref;
matchptr = pMatches;
for (;;) {
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK];
intervals[ref & POS_MASK] = nOffset;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK];
}
return (int)(matchptr - pMatches);
}
/**
* Skip previously compressed bytes
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
static void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0);
}
}
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
* the optimizer to look at.
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
int i;
for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
pMatch->length = 0;
pMatch->offset = 0;
}
else {
int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
if (nMaxLen < 0)
nMaxLen = 0;
if (pMatch->length > nMaxLen)
pMatch->length = (unsigned short)nMaxLen;
}
pMatch++;
}
}
}
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size(const int nLength) {
if (nLength < LITERALS_RUN_LEN) {
return 0;
}
else {
if (nLength < 256)
return 8;
else {
if (nLength < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
else {
if (nLength < 512) {
pOutData[nOutOffset++] = 250;
pOutData[nOutOffset++] = nLength - 256;
}
else {
pOutData[nOutOffset++] = 249;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size(const int nLength) {
if (nLength < MATCH_RUN_LEN) {
return 0;
}
else {
if ((nLength + MIN_MATCH_SIZE) < 256)
return 8;
else {
if ((nLength + MIN_MATCH_SIZE) < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
*/
static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN) {
if ((nLength + MIN_MATCH_SIZE) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
else {
if ((nLength + MIN_MATCH_SIZE) < 512) {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE - 256;
}
else {
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
nLastLiteralsOffset = nEndOffset;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nBestCost, nBestMatchLen, nBestMatchOffset;
int nLiteralsLen = nLastLiteralsOffset - i;
nBestCost = 8 + cost[i + 1];
if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == 256 || nLiteralsLen == 512) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nBestCost += 8;
}
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nBestCost += MODESWITCH_PENALTY;
nBestMatchLen = 0;
nBestMatchOffset = 0;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE);
nCurCost += cost[i + nMatchLen];
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN)
nMatchRunLen = MATCH_RUN_LEN;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE);
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
}
}
if (nBestMatchLen >= MIN_MATCH_SIZE)
nLastLiteralsOffset = i;
cost[i] = nBestCost;
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE) {
int nMatchLen = pMatch->length;
int nReduce = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) {
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE);
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) {
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
if (nTokenLongOffset) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
lzsa_optimize_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
return lzsa_write_block(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
return pCompressor->num_commands;
}

460
src/shrink_v1.c Normal file
View File

@ -0,0 +1,460 @@
/*
* shrink_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_v1.h"
#include "format.h"
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V1) {
return 0;
}
else {
if (nLength < 256)
return 8;
else {
if (nLength < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
else {
if (nLength < 512) {
pOutData[nOutOffset++] = 250;
pOutData[nOutOffset++] = nLength - 256;
}
else {
pOutData[nOutOffset++] = 249;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
if (nLength < MATCH_RUN_LEN_V1) {
return 0;
}
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
return 8;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
}
else {
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
nLastLiteralsOffset = nEndOffset;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nBestCost, nBestMatchLen, nBestMatchOffset;
int nLiteralsLen = nLastLiteralsOffset - i;
nBestCost = 8 + cost[i + 1];
if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nBestCost += 8;
}
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nBestCost += MODESWITCH_PENALTY;
nBestMatchLen = 0;
nBestMatchOffset = 0;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + nMatchLen];
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V1)
nMatchRunLen = MATCH_RUN_LEN_V1;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
}
}
if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
nLastLiteralsOffset = i;
cost[i] = nBestCost;
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchLen = pMatch->length;
int nReduce = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) {
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
if (nTokenLongOffset) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

53
src/shrink_v1.h Normal file
View File

@ -0,0 +1,53 @@
/*
* shrink_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_V1_H
#define _SHRINK_V1_H
/* Forward declarations */
typedef struct _lsza_compressor lsza_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_V1_H */

733
src/shrink_v2.c Normal file
View File

@ -0,0 +1,733 @@
/*
* shrink_v2.c - LZSA2 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_v2.h"
#include "format.h"
/**
* Write 4-bit nibble to output (compressed) buffer
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
* @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
* @param nCurFreeNibbles current number of free nibbles in byte
* @param nNibbleValue value to write (0..15)
*/
static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) {
if (nOutOffset < 0) return -1;
if ((*nCurNibbleOffset) == -1) {
if (nOutOffset >= nMaxOutDataSize) return -1;
(*nCurNibbleOffset) = nOutOffset;
(*nCurFreeNibbles) = 2;
pOutData[nOutOffset++] = 0;
}
pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f);
(*nCurFreeNibbles)--;
if ((*nCurFreeNibbles) == 0) {
(*nCurNibbleOffset) = -1;
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v2(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V2) {
return 0;
}
else {
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
return 4;
}
else {
if (nLength < 256)
return 4+8;
else {
return 4+24;
}
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V2) {
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2);
}
else {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
if (nOutOffset < 0) return -1;
if (nLength < 256)
pOutData[nOutOffset++] = nLength;
else {
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v2(const int nLength) {
if (nLength < MATCH_RUN_LEN_V2) {
return 0;
}
else {
if (nLength < (MATCH_RUN_LEN_V2 + 15))
return 4;
else {
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
return 4+8;
else {
return 4 + 24;
}
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
*/
static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
if (nLength >= MATCH_RUN_LEN_V2) {
if (nLength < (MATCH_RUN_LEN_V2 + 15)) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2);
}
else {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
if (nOutOffset < 0) return -1;
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2;
else {
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int *prev_match = (int*)pCompressor->intervals; /* Reuse */
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
lzsa_match *pBestMatch = pCompressor->best_match;
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
prev_match[nEndOffset - 1] = nEndOffset;
nLastLiteralsOffset = nEndOffset;
pCompressor->best_match[nEndOffset - 1].length = 0;
pCompressor->best_match[nEndOffset - 1].offset = 0;
repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
repmatch_opt[nEndOffset - 1].incoming_offset = -1;
repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nLiteralsCost;
int nLiteralsLen = nLastLiteralsOffset - i;
nLiteralsCost = 8 + cost[i + 1];
if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nLiteralsCost += 4;
}
else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nLiteralsCost += 8;
}
else if (nLiteralsLen == 256) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nLiteralsCost += 16;
}
if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
nLiteralsCost += MODESWITCH_PENALTY;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
cost[i] = nLiteralsCost;
pCompressor->best_match[i].length = 0;
pCompressor->best_match[i].offset = 0;
repmatch_opt[i].best_slot_for_incoming = -1;
repmatch_opt[i].incoming_offset = -1;
repmatch_opt[i].expected_repmatch = 0;
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
nBestCost = nLiteralsCost;
nBestMatchLen = 0;
nBestMatchOffset = 0;
nBestUpdatedSlot = -1;
nBestUpdatedIndex = -1;
nBestExpectedRepMatch = 0;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
int nCurIndex = prev_match[i + nMatchLen];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
nCurCost += cost[i + nMatchLen];
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = -1;
nBestUpdatedIndex = -1;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V2)
nMatchRunLen = MATCH_RUN_LEN_V2;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
int nCurIndex = prev_match[i + k];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
int nCurUpdatedSlot = -1;
int nCurUpdatedIndex = -1;
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
int r;
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
if (nAltCost <= nCurCost) {
nCurUpdatedSlot = r;
nCurUpdatedIndex = nCurIndex;
nCurCost = nAltCost;
nCurExpectedRepMatch = 2;
}
}
}
}
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = nCurUpdatedSlot;
nBestUpdatedIndex = nCurUpdatedIndex;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
int nCurIndex = prev_match[i + k];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
nCurCost += cost[i + k];
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
int nCurUpdatedSlot = -1;
int nCurUpdatedIndex = -1;
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
int r;
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
if (nAltCost <= nCurCost) {
nCurUpdatedSlot = r;
nCurUpdatedIndex = nCurIndex;
nCurCost = nAltCost;
nCurExpectedRepMatch = 2;
}
}
}
}
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = nCurUpdatedSlot;
nBestUpdatedIndex = nCurUpdatedIndex;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
}
pSlotCost[m] = nBestCost;
pMatch[m].length = nBestMatchLen;
pMatch[m].offset = nBestMatchOffset; /* not necessary */
if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
cost[i] = nBestCost;
pCompressor->best_match[i].length = nBestMatchLen;
pCompressor->best_match[i].offset = nBestMatchOffset;
repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
}
}
}
for (; m < NMATCHES_PER_OFFSET; m++) {
pSlotCost[m] = 0;
}
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
nLastLiteralsOffset = i;
prev_match[i] = nLastLiteralsOffset;
}
int nIncomingOffset = -1;
for (i = nStartOffset; i < nEndOffset; ) {
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
pCompressor->best_match[i].length = pMatch->length;
pCompressor->best_match[i].offset = pMatch->offset;
cost[i] = *pSlotCost;
if (repmatch_opt[i].expected_repmatch == 2)
repmatch_opt[i].expected_repmatch = 1;
}
else {
if (repmatch_opt[i].expected_repmatch == 2)
repmatch_opt[i].expected_repmatch = 0;
}
nIncomingOffset = i;
i += pCompressor->best_match[i].length;
}
else {
i++;
}
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
int nPreviousMatchOffset = -1;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->best_match + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchLen = pMatch->length;
int nReduce = 0;
int nCurrentMatchOffset = i;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16;
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2);
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->best_match[i + j].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
if (nPreviousMatchOffset >= 0) {
repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
nPreviousMatchOffset = -1;
}
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_OFFSET) {
/* Join */
pMatch->length += pCompressor->best_match[i + nMatchLen].length;
pCompressor->best_match[i + nMatchLen].offset = 0;
pCompressor->best_match[i + nMatchLen].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
nPreviousMatchOffset = nCurrentMatchOffset;
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
int nRepMatchOffset = 0;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->best_match + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen;
int nTokenOffsetMode;
int nOffsetSize;
if (nMatchOffset == nRepMatchOffset) {
nTokenOffsetMode = 0xe0;
nOffsetSize = 0;
}
else {
if (nMatchOffset <= 32) {
nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
nOffsetSize = 4;
}
else if (nMatchOffset <= 512) {
nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
nOffsetSize = 8;
}
else if (nMatchOffset <= (8192 + 512)) {
nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
nOffsetSize = 12;
}
else {
nTokenOffsetMode = 0xc0;
nOffsetSize = 16;
}
}
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
if (nOutOffset < 0) return -1;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
if (nOutOffset < 0) return -1;
}
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
}
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
if (nOutOffset < 0) return -1;
}
else if (nTokenOffsetMode == 0xc0) {
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nRepMatchOffset = nMatchOffset;
nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
if (nOutOffset < 0) return -1;
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
if (nOutOffset < 0) return -1;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if (nOutOffset >= nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0; /* Match offset */
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */
if (nOutOffset < 0) return -1;
if ((nOutOffset + 3) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0; /* Extended match length byte */
pOutData[nOutOffset++] = 0; /* 16-bit match length */
pOutData[nOutOffset++] = 0;
}
if (nCurNibbleOffset != -1) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0);
if (nOutOffset < 0 || nCurNibbleOffset != -1)
return -1;
}
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

53
src/shrink_v2.h Normal file
View File

@ -0,0 +1,53 @@
/*
* shrink_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_V2_H
#define _SHRINK_V2_H
/* Forward declarations */
typedef struct _lsza_compressor lsza_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_V2_H */