diff --git a/LICENSE b/LICENSE
index 32f668c..29b28c1 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,3 +1,3 @@
-The LZSA code is available under the Zlib license, except for src/shrink.c which is placed under the Creative Commons CC0 license.
+The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license.
Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information.
diff --git a/Makefile b/Makefile
index c04753b..a9d3601 100755
--- a/Makefile
+++ b/Makefile
@@ -10,10 +10,14 @@ $(OBJDIR)/%.o: src/../%.c
APP := lzsa
-OBJS := $(OBJDIR)/src/main.o
+OBJS := $(OBJDIR)/src/lzsa.o
OBJS += $(OBJDIR)/src/frame.o
-OBJS += $(OBJDIR)/src/shrink.o
-OBJS += $(OBJDIR)/src/expand.o
+OBJS += $(OBJDIR)/src/lib.o
+OBJS += $(OBJDIR)/src/matchfinder.o
+OBJS += $(OBJDIR)/src/shrink_v1.o
+OBJS += $(OBJDIR)/src/shrink_v2.o
+OBJS += $(OBJDIR)/src/expand_v1.o
+OBJS += $(OBJDIR)/src/expand_v2.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
diff --git a/README.md b/README.md
index 08abb66..790612a 100755
--- a/README.md
+++ b/README.md
@@ -40,7 +40,7 @@ Inspirations:
License:
* The LZSA code is available under the Zlib license.
-* The compressor (shrink.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
+* The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder.
# Stream format
diff --git a/VS2017/lzsa.vcxproj b/VS2017/lzsa.vcxproj
index 8461495..89325d5 100755
--- a/VS2017/lzsa.vcxproj
+++ b/VS2017/lzsa.vcxproj
@@ -177,24 +177,32 @@
-
+
+
+
-
+
+
+
-
+
+
+
-
-
+
+
+
+
diff --git a/VS2017/lzsa.vcxproj.filters b/VS2017/lzsa.vcxproj.filters
index 6767357..511e8d4 100755
--- a/VS2017/lzsa.vcxproj.filters
+++ b/VS2017/lzsa.vcxproj.filters
@@ -27,15 +27,9 @@
Fichiers d%27en-tĂȘte
-
- Fichiers sources
-
Fichiers sources
-
- Fichiers sources
-
Fichiers sources\libdivsufsort\include
@@ -48,17 +42,26 @@
Fichiers sources
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
-
- Fichiers sources
-
-
- Fichiers sources
-
-
- Fichiers sources
-
Fichiers sources\libdivsufsort\lib
@@ -74,5 +77,26 @@
Fichiers sources
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
+
+ Fichiers sources
+
\ No newline at end of file
diff --git a/asm/6502/decompress.asm b/asm/6502/decompress_v1.asm
similarity index 95%
rename from asm/6502/decompress.asm
rename to asm/6502/decompress_v1.asm
index f359d15..9289d32 100755
--- a/asm/6502/decompress.asm
+++ b/asm/6502/decompress_v1.asm
@@ -1,5 +1,5 @@
; -----------------------------------------------------------------------------
-; Decompress raw LZSA block. Create one with lzsa -r
+; Decompress raw LZSA1 block. Create one with lzsa -r
;
; in:
; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
@@ -31,7 +31,7 @@
OFFSLO = $43 ; zero-page location for temp offset
OFFSHI = $44
-DECOMPRESS_LZSA
+DECOMPRESS_LZSA1
LDY #$00
DECODE_TOKEN
diff --git a/asm/6502/decompress_v2.asm b/asm/6502/decompress_v2.asm
new file mode 100755
index 0000000..7c950ae
--- /dev/null
+++ b/asm/6502/decompress_v2.asm
@@ -0,0 +1,245 @@
+; -----------------------------------------------------------------------------
+; Decompress raw LZSA2 block.
+; Create one with lzsa -r -f2
+;
+; in:
+; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address
+; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address
+;
+; out:
+; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1
+; -----------------------------------------------------------------------------
+;
+; Copyright (C) 2019 Emmanuel Marty
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+; -----------------------------------------------------------------------------
+
+OFFSLO = $43 ; zero-page location for temp offset
+OFFSHI = $44
+FIXUP = $4B
+NIBBLES = $FB
+NIBCOUNT = $FC
+
+DECOMPRESS_LZSA2
+ LDY #$00
+ STY NIBBLES
+ STY NIBCOUNT
+
+DECODE_TOKEN
+ JSR GETSRC ; read token byte: XYZ|LL|MMM
+ PHA ; preserve token on stack
+
+ AND #$18 ; isolate literals count (LL)
+ BEQ NO_LITERALS ; skip if no literals to copy
+ CMP #$18 ; LITERALS_RUN_LEN_V2 << 3?
+ BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token
+
+ JSR GETNIBBLE ; get extra literals length nibble
+ CLC ; add nibble to len from token
+ ADC #$03 ; (LITERALS_RUN_LEN_V2)
+ CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
+ BNE PREPARE_COPY_LITERALS ; if less, literals count is complete
+
+ JSR GETSRC ; get extra byte of variable literals count
+ TAX ; non-zero?
+ BNE PREPARE_COPY_LITERALS_HIGH ; if so, literals count is complete
+
+ ; handle 16 bits literals count
+ ; literals count = directly these 16 bits
+ JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
+ TAY ; put high 8 bits in Y
+ JMP PREPARE_COPY_LITERALS_HIGH
+
+EMBEDDED_LITERALS
+ LSR A ; shift literals count into place
+ LSR A
+ LSR A
+
+PREPARE_COPY_LITERALS
+ TAX
+PREPARE_COPY_LITERALS_HIGH
+ INY
+
+COPY_LITERALS
+ JSR GETPUT ; copy one byte of literals
+ DEX
+ BNE COPY_LITERALS
+ DEY
+ BNE COPY_LITERALS
+
+NO_LITERALS
+ PLA ; retrieve token from stack
+ PHA ; preserve token again
+ BMI REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset
+
+ ASL ; 0YZ: 5 or 9 bit offset
+ BMI OFFSET_9_BIT
+
+ ; 00Z: 5 bit offset
+ LSR A ; Shift Z (offset bit 4) in place
+ LSR A
+ AND #$10
+ STA FIXUP
+
+ JSR GETNIBBLE ; get nibble for offset bits 0-3
+ ORA FIXUP ; merge offset bit 4
+ ORA #$E0 ; set offset bits 7-5 to 1
+ TAX ; store low byte of match offset
+ LDA #$0FF ; set offset bits 15-8 to 1
+ BNE GOT_OFFSET ; (*same as JMP GOT_OFFSET but shorter)
+
+OFFSET_9_BIT ; 01Z: 9 bit offset
+ ASL ; shift Z (offset bit 8) in place
+ ROL
+ ROL
+ ORA #$FE ; set offset bits 15-9 to 1
+ STA OFFSHI
+
+ JSR GETSRC ; get offset bits 0-7 from stream in A
+ TAX ; store low byte of match offset
+ JMP GOT_OFFSET_LO ; go prepare match
+
+REPMATCH_OR_LARGE_OFFSET
+ ASL ; 13 bit offset?
+ BMI REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not
+
+ ; 10Z: 13 bit offset
+
+ LSR A ; shift Z (offset bit 4) in place
+ LSR A
+ AND #$10
+ STA FIXUP
+
+ JSR GETSRC ; get offset bits 0-7 from stream in A
+ TAX ; store low byte of match offset
+
+ JSR GETNIBBLE ; get nibble for offset bits 8-11
+ ORA FIXUP ; merge offset bit 12
+ CLC
+ ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512)
+ BNE GOT_OFFSET ; go prepare match (*same as JMP GOT_OFFSET but shorter)
+
+REPMATCH_OR_16_BIT ; rep-match or 16 bit offset
+ ASL ; XYZ=111?
+ BMI REP_MATCH ; reuse previous offset if so (rep-match)
+
+ ; 110: handle 16 bit offset
+ JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
+
+GOT_OFFSET
+ STA OFFSHI ; store final match offset
+GOT_OFFSET_LO
+ STX OFFSLO
+
+REP_MATCH
+ CLC ; add dest + match offset
+ LDA PUTDST+1 ; low 8 bits
+ ADC OFFSLO
+ STA COPY_MATCH_LOOP+1 ; store back reference address
+ LDA OFFSHI ; high 8 bits
+ ADC PUTDST+2
+ STA COPY_MATCH_LOOP+2 ; store high 8 bits of address
+
+ PLA ; retrieve token from stack again
+ AND #$07 ; isolate match len (MMM)
+ CLC
+ ADC #$02 ; add MIN_MATCH_SIZE_V2
+ CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
+ BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token
+
+ JSR GETNIBBLE ; get extra match length nibble
+ CLC ; add nibble to len from token
+ ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2)
+ CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
+ BNE PREPARE_COPY_MATCH ; if less, match length is complete
+
+ JSR GETSRC ; get extra byte of variable match length
+ TAX ; non-zero?
+ BNE PREPARE_COPY_MATCH_Y ; if so, the match length is complete
+
+ ; Handle 16 bits match length
+ JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A
+ TAY ; put high 8 bits in Y
+ ; large match length with zero high byte?
+ BEQ DECOMPRESSION_DONE ; if so, this is the EOD code, bail
+ TXA
+
+PREPARE_COPY_MATCH
+ TAX
+PREPARE_COPY_MATCH_Y
+ INY
+
+COPY_MATCH_LOOP
+ LDA $AAAA ; get one byte of backreference
+ INC COPY_MATCH_LOOP+1
+ BNE GETMATCH_DONE
+ INC COPY_MATCH_LOOP+2
+GETMATCH_DONE
+ JSR PUTDST ; copy to destination
+ DEX
+ BNE COPY_MATCH_LOOP
+ DEY
+ BNE COPY_MATCH_LOOP
+ JMP DECODE_TOKEN
+
+GETNIBBLE
+ DEC NIBCOUNT
+ BPL HAS_NIBBLES
+
+ LDA #$01
+ STA NIBCOUNT
+ JSR GETSRC ; get 2 nibbles
+ STA NIBBLES
+ LSR A
+ LSR A
+ LSR A
+ LSR A
+ RTS
+
+HAS_NIBBLES
+ LDA NIBBLES
+ AND #$0F ; isolate low 4 bits of nibble
+ RTS
+
+GETPUT
+ JSR GETSRC
+PUTDST
+LZSA_DST_LO = *+1
+LZSA_DST_HI = *+2
+ STA $AAAA
+ INC PUTDST+1
+ BNE PUTDST_DONE
+ INC PUTDST+2
+PUTDST_DONE
+DECOMPRESSION_DONE
+ RTS
+
+GETLARGESRC
+ JSR GETSRC ; grab low 8 bits
+ TAX ; move to X
+ ; fall through grab high 8 bits
+
+GETSRC
+LZSA_SRC_LO = *+1
+LZSA_SRC_HI = *+2
+ LDA $AAAA
+ INC GETSRC+1
+ BNE GETSRC_DONE
+ INC GETSRC+2
+GETSRC_DONE
+ RTS
diff --git a/asm/8088/decompress_small.S b/asm/8088/decompress_small_v1.S
similarity index 95%
rename from asm/8088/decompress_small.S
rename to asm/8088/decompress_small_v1.S
index 813e4fd..e691dde 100755
--- a/asm/8088/decompress_small.S
+++ b/asm/8088/decompress_small_v1.S
@@ -22,15 +22,15 @@
bits 16
; ---------------------------------------------------------------------------
-; Decompress raw LZSA block
+; Decompress raw LZSA1 block
; inputs:
-; * ds:si: raw LZSA block
+; * ds:si: raw LZSA1 block
; * es:di: output buffer
; output:
; * ax: decompressed size
; ---------------------------------------------------------------------------
-lzsa_decompress:
+lzsa1_decompress:
push di ; remember decompression offset
cld ; make string operations (lods, movs, stos..) move forward
diff --git a/asm/8088/decompress_small_v2.S b/asm/8088/decompress_small_v2.S
new file mode 100755
index 0000000..61e99f7
--- /dev/null
+++ b/asm/8088/decompress_small_v2.S
@@ -0,0 +1,174 @@
+; decompress_small.S - space-efficient decompressor implementation for 8088
+;
+; Copyright (C) 2019 Emmanuel Marty
+;
+; This software is provided 'as-is', without any express or implied
+; warranty. In no event will the authors be held liable for any damages
+; arising from the use of this software.
+;
+; Permission is granted to anyone to use this software for any purpose,
+; including commercial applications, and to alter it and redistribute it
+; freely, subject to the following restrictions:
+;
+; 1. The origin of this software must not be misrepresented; you must not
+; claim that you wrote the original software. If you use this software
+; in a product, an acknowledgment in the product documentation would be
+; appreciated but is not required.
+; 2. Altered source versions must be plainly marked as such, and must not be
+; misrepresented as being the original software.
+; 3. This notice may not be removed or altered from any source distribution.
+
+ segment .text
+ bits 16
+
+; ---------------------------------------------------------------------------
+; Decompress raw LZSA2 block
+; inputs:
+; * ds:si: raw LZSA2 block
+; * es:di: output buffer
+; output:
+; * ax: decompressed size
+; ---------------------------------------------------------------------------
+
+lzsa2_decompress:
+ push di ; remember decompression offset
+ cld ; make string operations (lods, movs, stos..) move forward
+
+ xor cx,cx
+ xor bx,bx
+ xor bp,bp
+
+.decode_token:
+ mov ax,cx ; clear ah - cx is zero from above or from after rep movsb in .copy_match
+ lodsb ; read token byte: XYZ|LL|MMMM
+ mov dx,ax ; keep token in dl
+
+ and al,018H ; isolate literals length in token (LL)
+ mov cl,3
+ shr al,cl ; shift literals length into place
+
+ cmp al,03H ; LITERALS_RUN_LEN_V2?
+ jne .got_literals ; no, we have the full literals count from the token, go copy
+
+ call .get_nibble ; get extra literals length nibble
+ add al,cl ; add len from token to nibble
+ cmp al,012H ; LITERALS_RUN_LEN_V2 + 15 ?
+ jne .got_literals ; if not, we have the full literals count, go copy
+
+ lodsb ; grab extra length byte
+ test al,al ; zero?
+ jne .got_literals ; if not, we have the full literals count, go copy
+
+ lodsw ; grab 16-bit extra length
+
+.got_literals:
+ xchg cx,ax
+ rep movsb ; copy cx literals from ds:si to es:di
+
+ test dl,dl ; check match offset mode in token (X bit)
+ js .rep_match_or_large_offset
+
+ cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit)
+ jnb .offset_9_bit
+
+ ; 5 bit offset
+ xchg ax,cx ; clear ah - cx is zero from the rep movsb above
+ mov al,020H ; shift Z (offset bit 4) in place
+ and al,dl
+ shr al,1
+ call .get_nibble ; get nibble for offset bits 0-3
+ or al,cl ; merge nibble
+ or al,0E0H ; set offset bits 7-5 to 1
+ dec ah ; set offset bits 15-8 to 1
+ jmp short .get_match_length
+
+.offset_9_bit: ; 9 bit offset
+ xchg ax,cx ; clear ah - cx is zero from the rep movsb above
+ lodsb ; get 8 bit offset from stream in A
+ dec ah ; set offset bits 15-8 to 1
+ test dl,020H ; test bit Z (offset bit 8)
+ jne .get_match_length
+ dec ah ; clear bit 8 if Z bit is clear
+ jmp short .get_match_length
+
+.rep_match_or_large_offset:
+ cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit)
+ jnb .rep_match_or_16_bit
+
+ ; 13 bit offset
+ lodsb ; load match offset bits 0-7
+
+ mov ah,020H ; shift Z (offset bit 12) in place
+ and ah,dl
+ shr ah,1
+ call .get_nibble ; get nibble for offset bits 8-11
+ or ah,cl ; merge nibble
+ or ah,0E0H ; set offset bits 15-13 to 1
+ sub ah,2 ; substract 512
+ jmp short .get_match_length
+
+.rep_match_or_16_bit:
+ test dl,020H ; test bit Z (offset bit 8)
+ jne .repeat_match ; rep-match
+
+ ; 16 bit offset
+ lodsw ; Get 2-byte match offset
+
+.get_match_length:
+ mov bp,ax ; bp: offset
+.repeat_match:
+ mov ax,dx ; ax: original token
+ and al,07H ; isolate match length in token (MMM)
+ add al,2 ; add MIN_MATCH_SIZE_V2
+
+ cmp al,09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2?
+ jne .got_matchlen ; no, we have the full match length from the token, go copy
+
+ call .get_nibble ; get extra literals length nibble
+ add al,cl ; add len from token to nibble
+ cmp al,018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
+ jne .got_matchlen ; no, we have the full match length from the token, go copy
+
+ lodsb ; grab extra length byte
+ test al,al ; zero?
+ jne .got_matchlen ; if not, we have the entire length
+
+ lodsw ; grab 16-bit length
+ test ax,ax ; bail if we hit EOD
+ je short .done_decompressing
+
+.got_matchlen:
+ xchg cx,ax ; copy match length into cx
+ push ds ; save ds:si (current pointer to compressed data)
+ xchg si,ax
+ push es
+ pop ds
+ mov si,di ; ds:si now points at back reference in output data
+ add si,bp
+ rep movsb ; copy match
+ xchg si,ax ; restore ds:si
+ pop ds
+ jmp .decode_token ; go decode another token
+
+.done_decompressing:
+ pop ax ; retrieve the original decompression offset
+ xchg ax,di ; compute decompressed size
+ sub ax,di
+ ret ; done
+
+.get_nibble:
+ dec bh ; nibble ready?
+ jns .has_nibble
+
+ mov cx,ax
+ lodsb ; load two nibbles
+ mov bl,al
+ mov bh,1
+ mov ax,cx
+
+.has_nibble:
+ mov cl,4 ; swap 4 high and low bits of nibble
+ ror bl,cl
+ mov cl,0FH
+ and cl,bl
+ ret
diff --git a/src/expand.c b/src/expand_v1.c
old mode 100755
new mode 100644
similarity index 72%
rename from src/expand.c
rename to src/expand_v1.c
index eed5326..c19fb08
--- a/src/expand.c
+++ b/src/expand_v1.c
@@ -1,5 +1,5 @@
/*
- * expand.c - block decompressor implementation
+ * expand_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@@ -20,11 +20,21 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
#include
#include
#include
#include "format.h"
-#include "expand.h"
+#include "expand_v1.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
@@ -32,11 +42,11 @@
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
-static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
+static inline FORCE_INLINE int lzsa_expand_literals_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
- if (nLiterals == LITERALS_RUN_LEN) {
+ if (nLiterals == LITERALS_RUN_LEN_V1) {
unsigned char nByte;
if (pInBlock < pInBlockEnd) {
@@ -83,12 +93,12 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
return 0;
}
-static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
+static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
const unsigned char *pInBlock = *ppInBlock;
unsigned char *pCurOutData = *ppCurOutData;
- nMatchLen += MIN_MATCH_SIZE;
- if (nMatchLen == (MATCH_RUN_LEN + MIN_MATCH_SIZE)) {
+ nMatchLen += MIN_MATCH_SIZE_V1;
+ if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
unsigned char nByte;
if (pInBlock < pInBlockEnd) {
@@ -159,7 +169,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
}
/**
- * Decompress one data block
+ * Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
@@ -169,7 +179,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
*
* @return size of decompressed data in bytes, or -1 for error
*/
-int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
@@ -182,36 +192,35 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
- if (nLiterals < LITERALS_RUN_LEN) {
+ if (nLiterals < LITERALS_RUN_LEN_V1) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
- if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
+ if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1;
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset;
- nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff));
+ nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
if (token & 0x80) {
- nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8);
+ nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
}
- nMatchOffset++;
- const unsigned char *pSrc = pCurOutData - nMatchOffset;
+ const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
- if (nMatchLen < MATCH_RUN_LEN && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+ if (nMatchLen < MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 4);
- pCurOutData += (MIN_MATCH_SIZE + nMatchLen);
+ pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
}
else {
- if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
+ if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
}
@@ -227,22 +236,21 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
- if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
+ if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
return -1;
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
int nMatchOffset;
- nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff));
+ nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00;
if (token & 0x80) {
- nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8);
+ nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8);
}
- nMatchOffset++;
- const unsigned char *pSrc = pCurOutData - nMatchOffset;
+ const unsigned char *pSrc = pCurOutData + nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
- if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
+ if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
return -1;
}
else {
diff --git a/src/expand.h b/src/expand_v1.h
old mode 100755
new mode 100644
similarity index 65%
rename from src/expand.h
rename to src/expand_v1.h
index 06cbcf9..d6f44b9
--- a/src/expand.h
+++ b/src/expand_v1.h
@@ -1,5 +1,5 @@
/*
- * expand.h - block decompressor definitions
+ * expand_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@@ -20,11 +20,21 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
-#ifndef _EXPAND_H
-#define _EXPAND_H
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_V1_H
+#define _EXPAND_V1_H
/**
- * Decompress one data block
+ * Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
@@ -34,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
-int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
+int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
-#endif /* _EXPAND_H */
+#endif /* _EXPAND_V1_H */
diff --git a/src/expand_v2.c b/src/expand_v2.c
new file mode 100644
index 0000000..2c993e5
--- /dev/null
+++ b/src/expand_v2.c
@@ -0,0 +1,330 @@
+/*
+ * expand_v2.c - LZSA2 block decompressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+#include
+#include "format.h"
+#include "expand_v2.h"
+
+#ifdef _MSC_VER
+#define FORCE_INLINE __forceinline
+#else /* _MSC_VER */
+#define FORCE_INLINE __attribute__((always_inline))
+#endif /* _MSC_VER */
+
+static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles) {
+ unsigned int nValue;
+
+ if ((*nCurNibbles ^= 1) != 0) {
+ const unsigned char *pInBlock = *ppInBlock;
+ if (pInBlock >= pInBlockEnd) return -1;
+ (*nibbles) = *pInBlock++;
+ *ppInBlock = pInBlock;
+ }
+
+ nValue = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
+
+ (*nibbles) <<= 4;
+
+ return nValue;
+}
+
+static inline FORCE_INLINE int lzsa_expand_literals_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, int *nCurNibbles, unsigned char *nibbles,
+ unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) {
+ const unsigned char *pInBlock = *ppInBlock;
+ unsigned char *pCurOutData = *ppCurOutData;
+
+ if (nLiterals == LITERALS_RUN_LEN_V2) {
+ nLiterals += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
+
+ if (nLiterals == (LITERALS_RUN_LEN_V2 + 15)) {
+ if (pInBlock < pInBlockEnd) {
+ nLiterals = ((unsigned int)*pInBlock++);
+
+ if (nLiterals == 0) {
+ if ((pInBlock + 1) < pInBlockEnd) {
+ nLiterals = ((unsigned int)*pInBlock++);
+ nLiterals |= (((unsigned int)*pInBlock++) << 8);
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+ if (nLiterals != 0) {
+ if ((pInBlock + nLiterals) <= pInBlockEnd &&
+ (pCurOutData + nLiterals) <= pOutDataEnd) {
+ memcpy(pCurOutData, pInBlock, nLiterals);
+ pInBlock += nLiterals;
+ pCurOutData += nLiterals;
+ }
+ else {
+ return -1;
+ }
+ }
+
+ *ppInBlock = pInBlock;
+ *ppCurOutData = pCurOutData;
+ return 0;
+}
+
+static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, int *nCurNibbles, unsigned char *nibbles,
+ unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) {
+ const unsigned char *pInBlock = *ppInBlock;
+ unsigned char *pCurOutData = *ppCurOutData;
+
+ nMatchLen += MIN_MATCH_SIZE_V2;
+ if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
+ nMatchLen += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles);
+
+ if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 + 15)) {
+ if (pInBlock < pInBlockEnd) {
+ nMatchLen = ((unsigned int)*pInBlock++);
+
+ if (nMatchLen == 0) {
+ if ((pInBlock + 1) < pInBlockEnd) {
+ nMatchLen = ((unsigned int)*pInBlock++);
+ nMatchLen |= (((unsigned int)*pInBlock++) << 8);
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+ if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
+ /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
+
+ if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
+ const unsigned char *pCopySrc = pSrc;
+ unsigned char *pCopyDst = pCurOutData;
+ const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
+
+ do {
+ memcpy(pCopyDst, pCopySrc, 8);
+ memcpy(pCopyDst + 8, pCopySrc + 8, 8);
+ pCopySrc += 16;
+ pCopyDst += 16;
+ } while (pCopyDst < pCopyEndDst);
+
+ pCurOutData += nMatchLen;
+ }
+ else {
+ while (nMatchLen >= 4) {
+ *pCurOutData++ = *pSrc++;
+ *pCurOutData++ = *pSrc++;
+ *pCurOutData++ = *pSrc++;
+ *pCurOutData++ = *pSrc++;
+ nMatchLen -= 4;
+ }
+ while (nMatchLen) {
+ *pCurOutData++ = *pSrc++;
+ nMatchLen--;
+ }
+ }
+ }
+ else {
+ return -1;
+ }
+
+ *ppInBlock = pInBlock;
+ *ppCurOutData = pCurOutData;
+ return 0;
+}
+
+/**
+ * Decompress one LZSA2 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+ const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
+ const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
+ unsigned char *pCurOutData = pOutData + nOutDataOffset;
+ const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
+ const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
+ int nCurNibbles = 0;
+ unsigned char nibbles;
+ int nMatchOffset = 0;
+
+ /* Fast loop */
+
+ while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
+ const unsigned char token = *pInBlock++;
+ unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
+
+ if (nLiterals < LITERALS_RUN_LEN_V2) {
+ memcpy(pCurOutData, pInBlock, 8);
+ pInBlock += nLiterals;
+ pCurOutData += nLiterals;
+ }
+ else {
+ if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
+ return -1;
+ }
+
+ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
+ unsigned char nOffsetMode = token & 0xc0;
+
+ switch (nOffsetMode) {
+ case 0x00:
+ /* 5 bit offset */
+ nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
+ nMatchOffset |= ((token & 0x20) >> 1);
+ nMatchOffset |= 0xffffffe0;
+ break;
+
+ case 0x40:
+ /* 9 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
+ nMatchOffset |= 0xfffffe00;
+ break;
+
+ case 0x80:
+ /* 13 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
+ nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
+ nMatchOffset |= 0xffffe000;
+ nMatchOffset -= 512;
+ break;
+
+ default:
+ /* Check if this is a 16 bit offset or a rep-match */
+ if ((token & 0x20) == 0) {
+ /* 16 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
+ nMatchOffset |= 0xffff0000;
+ }
+ break;
+ }
+
+ const unsigned char *pSrc = pCurOutData + nMatchOffset;
+ if (pSrc >= pOutData) {
+ unsigned int nMatchLen = (unsigned int)(token & 0x07);
+ if (nMatchLen < MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
+ memcpy(pCurOutData, pSrc, 8);
+ memcpy(pCurOutData + 8, pSrc + 8, 4);
+ pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
+ }
+ else {
+ if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
+ return -1;
+ }
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+ /* Slow loop for the remainder of the buffer */
+
+ while (pInBlock < pInBlockEnd) {
+ const unsigned char token = *pInBlock++;
+ unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
+
+ if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd))
+ return -1;
+
+ if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
+ unsigned char nOffsetMode = token & 0xc0;
+
+ switch (nOffsetMode) {
+ case 0x00:
+ /* 5 bit offset */
+ nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles);
+ nMatchOffset |= ((token & 0x20) >> 1);
+ nMatchOffset |= 0xffffffe0;
+ break;
+
+ case 0x40:
+ /* 9 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
+ nMatchOffset |= 0xfffffe00;
+ break;
+
+ case 0x80:
+ /* 13 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8);
+ nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
+ nMatchOffset |= 0xffffe000;
+ nMatchOffset -= 512;
+ break;
+
+ default:
+ /* Check if this is a 16 bit offset or a rep-match */
+ if ((token & 0x20) == 0) {
+ /* 16 bit offset */
+ nMatchOffset = (unsigned int)(*pInBlock++);
+ nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8);
+ nMatchOffset |= 0xffff0000;
+ }
+ break;
+ }
+
+ const unsigned char *pSrc = pCurOutData + nMatchOffset;
+ if (pSrc >= pOutData) {
+ unsigned int nMatchLen = (unsigned int)(token & 0x07);
+ if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
+ return -1;
+ }
+ else {
+ return -1;
+ }
+ }
+ }
+
+ return (int)(pCurOutData - (pOutData + nOutDataOffset));
+}
diff --git a/src/expand_v2.h b/src/expand_v2.h
new file mode 100644
index 0000000..906965c
--- /dev/null
+++ b/src/expand_v2.h
@@ -0,0 +1,49 @@
+/*
+ * expand_v2.h - LZSA2 block decompressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _EXPAND_V2_H
+#define _EXPAND_V2_H
+
+/**
+ * Decompress one LZSA2 data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
+
+#endif /* _EXPAND_V2_H */
diff --git a/src/format.h b/src/format.h
index 899b7b4..c9534fa 100755
--- a/src/format.h
+++ b/src/format.h
@@ -20,13 +20,28 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
#ifndef _FORMAT_H
#define _FORMAT_H
-#define MIN_MATCH_SIZE 3
#define MIN_OFFSET 1
#define MAX_OFFSET 0xffff
-#define LITERALS_RUN_LEN 7
-#define MATCH_RUN_LEN 15
+
+#define MIN_MATCH_SIZE_V1 3
+#define LITERALS_RUN_LEN_V1 7
+#define MATCH_RUN_LEN_V1 15
+
+#define MIN_MATCH_SIZE_V2 2
+#define LITERALS_RUN_LEN_V2 3
+#define MATCH_RUN_LEN_V2 7
#endif /* _FORMAT_H */
diff --git a/src/frame.c b/src/frame.c
index ba99904..edbe531 100644
--- a/src/frame.c
+++ b/src/frame.c
@@ -20,9 +20,18 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
#include
#include "frame.h"
-#include "shrink.h"
#define LZSA_ID_0 0x7b
#define LZSA_ID_1 0x9e
@@ -53,11 +62,11 @@ int lzsa_get_frame_size(void) {
*
* @return number of encoded bytes, or -1 for failure
*/
-int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize) {
- if (nMaxFrameDataSize >= 3) {
+int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
+ if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;
- pFrameData[2] = 0; /* Format version 1 */
+ pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0; /* Format version 1 */
return 3;
}
@@ -139,14 +148,16 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @return 0 for success, or -1 for failure
*/
-int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize) {
+int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) {
if (nFrameDataSize != 3 ||
pFrameData[0] != LZSA_ID_0 ||
pFrameData[1] != LZSA_ID_1 ||
- pFrameData[2] != 0) {
+ (pFrameData[2] & 0x1f) != 0 ||
+ ((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) {
return -1;
}
else {
+ *nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1;
return 0;
}
}
diff --git a/src/frame.h b/src/frame.h
index e858110..520b065 100644
--- a/src/frame.h
+++ b/src/frame.h
@@ -20,6 +20,16 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
#ifndef _FRAME_H
#define _FRAME_H
@@ -45,7 +55,7 @@ int lzsa_get_frame_size(void);
*
* @return number of encoded bytes, or -1 for failure
*/
-int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize);
+int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
/**
* Encode compressed block frame header
@@ -87,7 +97,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS
*
* @return 0 for success, or -1 for failure
*/
-int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize);
+int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion);
/**
* Decode frame header
diff --git a/src/lib.c b/src/lib.c
new file mode 100755
index 0000000..5a516b2
--- /dev/null
+++ b/src/lib.c
@@ -0,0 +1,217 @@
+/*
+ * lib.c - LZSA library implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+#include
+#include "lib.h"
+#include "matchfinder.h"
+#include "shrink_v1.h"
+#include "shrink_v2.h"
+#include "expand_v1.h"
+#include "expand_v2.h"
+#include "format.h"
+
+/**
+ * Initialize compression context
+ *
+ * @param pCompressor compression context to initialize
+ * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
+ * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
+ * @param nFlags compression flags
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
+ int nResult;
+ int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
+
+ nResult = divsufsort_init(&pCompressor->divsufsort_context);
+ pCompressor->intervals = NULL;
+ pCompressor->pos_data = NULL;
+ pCompressor->open_intervals = NULL;
+ pCompressor->match = NULL;
+ pCompressor->best_match = NULL;
+ pCompressor->slot_cost = NULL;
+ pCompressor->repmatch_opt = NULL;
+ pCompressor->min_match_size = nMinMatchSize;
+ if (pCompressor->min_match_size < nMinMatchSizeForFormat)
+ pCompressor->min_match_size = nMinMatchSizeForFormat;
+ else if (pCompressor->min_match_size > 5)
+ pCompressor->min_match_size = 5;
+ pCompressor->format_version = nFormatVersion;
+ pCompressor->flags = nFlags;
+ pCompressor->num_commands = 0;
+
+ if (!nResult) {
+ pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
+
+ if (pCompressor->intervals) {
+ pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
+
+ if (pCompressor->pos_data) {
+ pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
+
+ if (pCompressor->open_intervals) {
+ pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
+
+ if (pCompressor->match) {
+ if (pCompressor->format_version == 2) {
+ pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
+
+ if (pCompressor->best_match) {
+ pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
+
+ if (pCompressor->slot_cost) {
+ pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
+
+ if (pCompressor->repmatch_opt)
+ return 0;
+ }
+ }
+ }
+ else {
+ return 0;
+ }
+ }
+ }
+ }
+ }
+ }
+
+ lzsa_compressor_destroy(pCompressor);
+ return 100;
+}
+
+/**
+ * Clean up compression context and free up any associated resources
+ *
+ * @param pCompressor compression context to clean up
+ */
+void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
+ divsufsort_destroy(&pCompressor->divsufsort_context);
+
+ if (pCompressor->repmatch_opt) {
+ free(pCompressor->repmatch_opt);
+ pCompressor->repmatch_opt = NULL;
+ }
+
+ if (pCompressor->slot_cost) {
+ free(pCompressor->slot_cost);
+ pCompressor->slot_cost = NULL;
+ }
+
+ if (pCompressor->best_match) {
+ free(pCompressor->best_match);
+ pCompressor->best_match = NULL;
+ }
+
+ if (pCompressor->match) {
+ free(pCompressor->match);
+ pCompressor->match = NULL;
+ }
+
+ if (pCompressor->open_intervals) {
+ free(pCompressor->open_intervals);
+ pCompressor->open_intervals = NULL;
+ }
+
+ if (pCompressor->pos_data) {
+ free(pCompressor->pos_data);
+ pCompressor->pos_data = NULL;
+ }
+
+ if (pCompressor->intervals) {
+ free(pCompressor->intervals);
+ pCompressor->intervals = NULL;
+ }
+}
+
+/**
+ * Compress one block of data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
+ * @param nInDataSize number of input bytes to compress
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+ if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
+ return -1;
+ if (nPreviousBlockSize) {
+ lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
+ }
+ lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+ if (pCompressor->format_version == 1) {
+ return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
+ }
+ else if (pCompressor->format_version == 2) {
+ return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
+ }
+ else {
+ return -1;
+ }
+}
+
+/**
+ * Get the number of compression commands issued in compressed data blocks
+ *
+ * @return number of commands
+ */
+int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
+ return pCompressor->num_commands;
+}
+
+/**
+ * Decompress one data block
+ *
+ * @param pInBlock pointer to compressed data
+ * @param nInBlockSize size of compressed data, in bytes
+ * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
+ * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
+ * @param nBlockMaxSize total size of output decompression buffer, in bytes
+ *
+ * @return size of decompressed data in bytes, or -1 for error
+ */
+int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
+ if (nFormatVersion == 1)
+ return lzsa_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
+ else if (nFormatVersion == 2)
+ return lzsa_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
+ else
+ return -1;
+}
diff --git a/src/shrink.h b/src/lib.h
similarity index 57%
rename from src/shrink.h
rename to src/lib.h
index fcf3592..331b7cb 100755
--- a/src/shrink.h
+++ b/src/lib.h
@@ -1,5 +1,5 @@
/*
- * shrink.h - block compressor definitions
+ * lib.h - LZSA library definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
@@ -20,8 +20,18 @@
* 3. This notice may not be removed or altered from any source distribution.
*/
-#ifndef _SHRINK_H
-#define _SHRINK_H
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _LIB_H
+#define _LIB_H
#include "divsufsort.h"
@@ -29,17 +39,46 @@
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
-/* Forward declarations */
-typedef struct _lzsa_match lzsa_match;
+#define LCP_BITS 15
+#define LCP_MAX (1<<(LCP_BITS - 1))
+#define LCP_SHIFT (32-LCP_BITS)
+#define LCP_MASK (((1<
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
#include
#include
#include
@@ -31,17 +41,18 @@
#endif
#include "format.h"
#include "frame.h"
-#include "shrink.h"
-#include "expand.h"
+#include "lib.h"
#define BLOCK_SIZE 65536
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
+#define TOOL_VERSION "0.6.0"
+
/*---------------------------------------------------------------------------*/
-static long long lzsa_get_time() {
+static long long do_get_time() {
long long nTime;
#ifdef _WIN32
@@ -60,7 +71,7 @@ static long long lzsa_get_time() {
/*---------------------------------------------------------------------------*/
-static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) {
+static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
FILE *f_in, *f_out;
unsigned char *pInData, *pOutData;
lsza_compressor compressor;
@@ -146,7 +157,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
- nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags);
+ nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
@@ -165,7 +176,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
}
if ((nOptions & OPT_RAW) == 0) {
- int nHeaderSize = lzsa_encode_header(cFrameData, 16);
+ int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0)
bError = true;
else {
@@ -175,7 +186,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
}
if (nOptions & OPT_VERBOSE) {
- nStartTime = lzsa_get_time();
+ nStartTime = do_get_time();
}
int nPreviousBlockSize = 0;
@@ -280,7 +291,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
nCompressedSize += (long long)nFooterSize;
if (!bError && (nOptions & OPT_VERBOSE)) {
- nEndTime = lzsa_get_time();
+ nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
@@ -315,7 +326,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
/*---------------------------------------------------------------------------*/
-static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
unsigned int nFileSize = 0;
@@ -338,7 +349,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
return 100;
}
- if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) {
+ if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
@@ -423,7 +434,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
}
if (nOptions & OPT_VERBOSE) {
- nStartTime = lzsa_get_time();
+ nStartTime = do_get_time();
}
int nDecompressionError = 0;
@@ -476,7 +487,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
else {
unsigned int nBlockOffs = 0;
- nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
+ nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
@@ -518,7 +529,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
}
else {
if (nOptions & OPT_VERBOSE) {
- nEndTime = lzsa_get_time();
+ nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n",
@@ -529,7 +540,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
}
}
-static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
+static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
long long nKnownGoodSize = 0LL;
@@ -553,7 +564,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
return 100;
}
- if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) {
+ if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
@@ -659,7 +670,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
}
if (nOptions & OPT_VERBOSE) {
- nStartTime = lzsa_get_time();
+ nStartTime = do_get_time();
}
int nDecompressionError = 0;
@@ -715,7 +726,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
else {
unsigned int nBlockOffs = 0;
- nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
+ nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
@@ -771,7 +782,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
}
else {
if (nOptions & OPT_VERBOSE) {
- nEndTime = lzsa_get_time();
+ nEndTime = do_get_time();
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n",
@@ -793,9 +804,11 @@ int main(int argc, char **argv) {
bool bCommandDefined = false;
bool bVerifyCompression = false;
bool bMinMatchDefined = false;
+ bool bFormatVersionDefined = false;
char cCommand = 'z';
- int nMinMatchSize = MIN_MATCH_SIZE;
+ int nMinMatchSize = 0;
unsigned int nOptions = OPT_FAVOR_RATIO;
+ int nFormatVersion = 1;
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-d")) {
@@ -840,7 +853,7 @@ int main(int argc, char **argv) {
if (!bMinMatchDefined && (i + 1) < argc) {
char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10);
- if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
+ if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
i++;
bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO);
@@ -856,7 +869,7 @@ int main(int argc, char **argv) {
if (!bMinMatchDefined) {
char *pEnd = NULL;
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
- if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
+ if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) {
bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO);
}
@@ -869,7 +882,7 @@ int main(int argc, char **argv) {
}
else if (!strcmp(argv[i], "--prefer-ratio")) {
if (!bMinMatchDefined) {
- nMinMatchSize = MIN_MATCH_SIZE;
+ nMinMatchSize = 0;
bMinMatchDefined = true;
}
else
@@ -884,6 +897,35 @@ int main(int argc, char **argv) {
else
bArgsError = true;
}
+ else if (!strcmp(argv[i], "-f")) {
+ if (!bFormatVersionDefined && (i + 1) < argc) {
+ char *pEnd = NULL;
+ nFormatVersion = (int)strtol(argv[i + 1], &pEnd, 10);
+ if (pEnd && pEnd != argv[i + 1] && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
+ i++;
+ bFormatVersionDefined = true;
+ }
+ else {
+ bArgsError = true;
+ }
+ }
+ else
+ bArgsError = true;
+ }
+ else if (!strncmp(argv[i], "-f", 2)) {
+ if (!bFormatVersionDefined) {
+ char *pEnd = NULL;
+ nFormatVersion = (int)strtol(argv[i] + 2, &pEnd, 10);
+ if (pEnd && pEnd != (argv[i] + 2) && (nFormatVersion >= 1 && nFormatVersion <= 2)) {
+ bFormatVersionDefined = true;
+ }
+ else {
+ bArgsError = true;
+ }
+ }
+ else
+ bArgsError = true;
+ }
else if (!strcmp(argv[i], "-v")) {
if ((nOptions & OPT_VERBOSE) == 0) {
nOptions |= OPT_VERBOSE;
@@ -911,26 +953,28 @@ int main(int argc, char **argv) {
}
if (bArgsError || !pszInFilename || !pszOutFilename) {
+ fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] \n", argv[0]);
fprintf(stderr, " -c: check resulting stream after compressing\n");
fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -v: be verbose\n");
+ fprintf(stderr, " -f : LZSA compression format (1-2)\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
fprintf(stderr, " -D : use dictionary file\n");
- fprintf(stderr, " -m : minimum match size (3-14) (default: 3)\n");
+ fprintf(stderr, " -m : minimum match size (3-5) (default: 3)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
return 100;
}
if (cCommand == 'z') {
- int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize);
+ int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
if (nResult == 0 && bVerifyCompression) {
- nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions);
+ nResult = do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion);
}
}
else if (cCommand == 'd') {
- return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
+ return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
}
else {
return 100;
diff --git a/src/matchfinder.c b/src/matchfinder.c
new file mode 100644
index 0000000..c421eb6
--- /dev/null
+++ b/src/matchfinder.c
@@ -0,0 +1,294 @@
+/*
+ * matchfinder.c - LZ match finder implementation
+ *
+ * The following copying information applies to this specific source code file:
+ *
+ * Written in 2019 by Emmanuel Marty
+ * Portions written in 2014-2015 by Eric Biggers
+ *
+ * To the extent possible under law, the author(s) have dedicated all copyright
+ * and related and neighboring rights to this software to the public domain
+ * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
+ * Dedication (the "CC0").
+ *
+ * This software is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
+ *
+ * You should have received a copy of the CC0 along with this software; if not
+ * see .
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+#include
+#include "matchfinder.h"
+#include "format.h"
+#include "lib.h"
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
+ unsigned int *intervals = pCompressor->intervals;
+
+ /* Build suffix array from input data */
+ if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
+ return 100;
+ }
+
+ int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
+ int *Phi = PLCP;
+ int nCurLen = 0;
+ int i;
+
+ /* Compute the permuted LCP first (Kärkkäinen method) */
+ Phi[intervals[0]] = -1;
+ for (i = 1; i < nInWindowSize; i++)
+ Phi[intervals[i]] = intervals[i - 1];
+ for (i = 0; i < nInWindowSize; i++) {
+ if (Phi[i] == -1) {
+ PLCP[i] = 0;
+ continue;
+ }
+ int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
+ while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
+ PLCP[i] = nCurLen;
+ if (nCurLen > 0)
+ nCurLen--;
+ }
+
+ /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
+ * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
+ * and the interval builder below doesn't need it either. */
+ intervals[0] &= POS_MASK;
+ int nMinMatchSize = pCompressor->min_match_size;
+ for (i = 1; i < nInWindowSize - 1; i++) {
+ int nIndex = (int)(intervals[i] & POS_MASK);
+ int nLen = PLCP[nIndex];
+ if (nLen < nMinMatchSize)
+ nLen = 0;
+ if (nLen > LCP_MAX)
+ nLen = LCP_MAX;
+ intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
+ }
+ if (i < nInWindowSize)
+ intervals[i] &= POS_MASK;
+
+ /**
+ * Build intervals for finding matches
+ *
+ * Methodology and code fragment taken from wimlib (CC0 license):
+ * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+ */
+ unsigned int * const SA_and_LCP = intervals;
+ unsigned int *pos_data = pCompressor->pos_data;
+ unsigned int next_interval_idx;
+ unsigned int *top = pCompressor->open_intervals;
+ unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
+
+ *top = 0;
+ intervals[0] = 0;
+ next_interval_idx = 1;
+
+ for (int r = 1; r < nInWindowSize; r++) {
+ const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
+ const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
+ const unsigned int top_lcp = *top & LCP_MASK;
+
+ if (next_lcp == top_lcp) {
+ /* Continuing the deepest open interval */
+ pos_data[prev_pos] = *top;
+ }
+ else if (next_lcp > top_lcp) {
+ /* Opening a new interval */
+ *++top = next_lcp | next_interval_idx++;
+ pos_data[prev_pos] = *top;
+ }
+ else {
+ /* Closing the deepest open interval */
+ pos_data[prev_pos] = *top;
+ for (;;) {
+ const unsigned int closed_interval_idx = *top-- & POS_MASK;
+ const unsigned int superinterval_lcp = *top & LCP_MASK;
+
+ if (next_lcp == superinterval_lcp) {
+ /* Continuing the superinterval */
+ intervals[closed_interval_idx] = *top;
+ break;
+ }
+ else if (next_lcp > superinterval_lcp) {
+ /* Creating a new interval that is a
+ * superinterval of the one being
+ * closed, but still a subinterval of
+ * its superinterval */
+ *++top = next_lcp | next_interval_idx++;
+ intervals[closed_interval_idx] = *top;
+ break;
+ }
+ else {
+ /* Also closing the superinterval */
+ intervals[closed_interval_idx] = *top;
+ }
+ }
+ }
+ prev_pos = next_pos;
+ }
+
+ /* Close any still-open intervals. */
+ pos_data[prev_pos] = *top;
+ for (; top > pCompressor->open_intervals; top--)
+ intervals[*top & POS_MASK] = *(top - 1);
+
+ /* Success */
+ return 0;
+}
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ *
+ * @return number of matches
+ */
+int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
+ unsigned int *intervals = pCompressor->intervals;
+ unsigned int *pos_data = pCompressor->pos_data;
+ unsigned int ref;
+ unsigned int super_ref;
+ unsigned int match_pos;
+ lzsa_match *matchptr;
+
+ /**
+ * Find matches using intervals
+ *
+ * Taken from wimlib (CC0 license):
+ * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
+ */
+
+ /* Get the deepest lcp-interval containing the current suffix. */
+ ref = pos_data[nOffset];
+
+ pos_data[nOffset] = 0;
+
+ /* Ascend until we reach a visited interval, the root, or a child of the
+ * root. Link unvisited intervals to the current suffix as we go. */
+ while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
+ intervals[ref & POS_MASK] = nOffset;
+ ref = super_ref;
+ }
+
+ if (super_ref == 0) {
+ /* In this case, the current interval may be any of:
+ * (1) the root;
+ * (2) an unvisited child of the root;
+ * (3) an interval last visited by suffix 0
+ *
+ * We could avoid the ambiguity with (3) by using an lcp
+ * placeholder value other than 0 to represent "visited", but
+ * it's fastest to use 0. So we just don't allow matches with
+ * position 0. */
+
+ if (ref != 0) /* Not the root? */
+ intervals[ref & POS_MASK] = nOffset;
+ return 0;
+ }
+
+ /* Ascend indirectly via pos_data[] links. */
+ match_pos = super_ref;
+ matchptr = pMatches;
+ for (;;) {
+ while ((super_ref = pos_data[match_pos]) > ref)
+ match_pos = intervals[super_ref & POS_MASK];
+ intervals[ref & POS_MASK] = nOffset;
+ pos_data[match_pos] = ref;
+
+ if ((matchptr - pMatches) < nMaxMatches) {
+ int nMatchOffset = (int)(nOffset - match_pos);
+
+ if (nMatchOffset <= MAX_OFFSET) {
+ matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
+ matchptr->offset = (unsigned short)nMatchOffset;
+ matchptr++;
+ }
+ }
+
+ if (super_ref == 0)
+ break;
+ ref = super_ref;
+ match_pos = intervals[ref & POS_MASK];
+ }
+
+ return (int)(matchptr - pMatches);
+}
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ lzsa_match match;
+ int i;
+
+ /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
+ * we don't store the matches. */
+ for (i = nStartOffset; i < nEndOffset; i++) {
+ lzsa_find_matches_at(pCompressor, i, &match, 0);
+ }
+}
+
+/**
+ * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
+ * the optimizer to look at.
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
+ int i;
+
+ for (i = nStartOffset; i < nEndOffset; i++) {
+ int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
+ int m;
+
+ for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
+ if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
+ pMatch->length = 0;
+ pMatch->offset = 0;
+ }
+ else {
+ int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
+ if (nMaxLen < 0)
+ nMaxLen = 0;
+ if (pMatch->length > nMaxLen)
+ pMatch->length = (unsigned short)nMaxLen;
+ }
+
+ pMatch++;
+ }
+ }
+}
diff --git a/src/matchfinder.h b/src/matchfinder.h
new file mode 100644
index 0000000..27bcc34
--- /dev/null
+++ b/src/matchfinder.h
@@ -0,0 +1,82 @@
+/*
+ * matchfinder.h - LZ match finder definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _MATCHFINDER_H
+#define _MATCHFINDER_H
+
+/* Forward declarations */
+typedef struct _lzsa_match lzsa_match;
+typedef struct _lsza_compressor lsza_compressor;
+
+/**
+ * Parse input data, build suffix array and overlaid data structures to speed up match finding
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
+ *
+ * @return 0 for success, non-zero for failure
+ */
+int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
+
+/**
+ * Find matches at the specified offset in the input window
+ *
+ * @param pCompressor compression context
+ * @param nOffset offset to find matches at, in the input window
+ * @param pMatches pointer to returned matches
+ * @param nMaxMatches maximum number of matches to return (0 for none)
+ *
+ * @return number of matches
+ */
+int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
+
+/**
+ * Skip previously compressed bytes
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically 0)
+ * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
+ */
+void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+
+/**
+ * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
+ * the optimizer to look at.
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
+
+#endif /* _MATCHFINDER_H */
diff --git a/src/shrink.c b/src/shrink.c
deleted file mode 100755
index cd6fa9c..0000000
--- a/src/shrink.c
+++ /dev/null
@@ -1,830 +0,0 @@
-/*
- * shrink.c - block compressor implementation
- *
- * The following copying information applies to this specific source code file:
- *
- * Written in 2019 by Emmanuel Marty
- * With help, ideas, optimizations and speed measurements by spke
- * Portions written in 2014-2015 by Eric Biggers
- *
- * To the extent possible under law, the author(s) have dedicated all copyright
- * and related and neighboring rights to this software to the public domain
- * worldwide via the Creative Commons Zero 1.0 Universal Public Domain
- * Dedication (the "CC0").
- *
- * This software is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- * FOR A PARTICULAR PURPOSE. See the CC0 for more details.
- *
- * You should have received a copy of the CC0 along with this software; if not
- * see .
- */
-
-/*
- * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
- *
- * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
- * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
- * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
- *
- */
-
-#include
-#include
-#include
-#include "shrink.h"
-#include "format.h"
-
-#define LCP_BITS 15
-#define LCP_MAX (1<<(LCP_BITS - 1))
-#define LCP_SHIFT (32-LCP_BITS)
-#define LCP_MASK (((1<divsufsort_context);
- pCompressor->intervals = NULL;
- pCompressor->pos_data = NULL;
- pCompressor->open_intervals = NULL;
- pCompressor->match = NULL;
- pCompressor->min_match_size = nMinMatchSize;
- if (pCompressor->min_match_size < MIN_MATCH_SIZE)
- pCompressor->min_match_size = MIN_MATCH_SIZE;
- else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
- pCompressor->min_match_size = MATCH_RUN_LEN - 1;
- pCompressor->flags = nFlags;
- pCompressor->num_commands = 0;
-
- if (!nResult) {
- pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
-
- if (pCompressor->intervals) {
- pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
-
- if (pCompressor->pos_data) {
- pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
-
- if (pCompressor->open_intervals) {
- pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
-
- if (pCompressor->match)
- return 0;
- }
- }
- }
- }
-
- lzsa_compressor_destroy(pCompressor);
- return 100;
-}
-
-/**
- * Clean up compression context and free up any associated resources
- *
- * @param pCompressor compression context to clean up
- */
-void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
- divsufsort_destroy(&pCompressor->divsufsort_context);
-
- if (pCompressor->match) {
- free(pCompressor->match);
- pCompressor->match = NULL;
- }
-
- if (pCompressor->open_intervals) {
- free(pCompressor->open_intervals);
- pCompressor->open_intervals = NULL;
- }
-
- if (pCompressor->pos_data) {
- free(pCompressor->pos_data);
- pCompressor->pos_data = NULL;
- }
-
- if (pCompressor->intervals) {
- free(pCompressor->intervals);
- pCompressor->intervals = NULL;
- }
-}
-
-/**
- * Parse input data, build suffix array and overlaid data structures to speed up match finding
- *
- * @param pCompressor compression context
- * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
- * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
- *
- * @return 0 for success, non-zero for failure
- */
-static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
- unsigned int *intervals = pCompressor->intervals;
-
- /* Build suffix array from input data */
- if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
- return 100;
- }
-
- int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */
- int *Phi = PLCP;
- int nCurLen = 0;
- int i;
-
- /* Compute the permuted LCP first (Kärkkäinen method) */
- Phi[intervals[0]] = -1;
- for (i = 1; i < nInWindowSize; i++)
- Phi[intervals[i]] = intervals[i - 1];
- for (i = 0; i < nInWindowSize; i++) {
- if (Phi[i] == -1) {
- PLCP[i] = 0;
- continue;
- }
- int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]);
- while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++;
- PLCP[i] = nCurLen;
- if (nCurLen > 0)
- nCurLen--;
- }
-
- /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also
- * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
- * and the interval builder below doesn't need it either. */
- intervals[0] &= POS_MASK;
- int nMinMatchSize = pCompressor->min_match_size;
- for (i = 1; i < nInWindowSize - 1; i++) {
- int nIndex = (int)(intervals[i] & POS_MASK);
- int nLen = PLCP[nIndex];
- if (nLen < nMinMatchSize)
- nLen = 0;
- if (nLen > LCP_MAX)
- nLen = LCP_MAX;
- intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT);
- }
- if (i < nInWindowSize)
- intervals[i] &= POS_MASK;
-
- /**
- * Build intervals for finding matches
- *
- * Methodology and code fragment taken from wimlib (CC0 license):
- * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
- */
- unsigned int * const SA_and_LCP = intervals;
- unsigned int *pos_data = pCompressor->pos_data;
- unsigned int next_interval_idx;
- unsigned int *top = pCompressor->open_intervals;
- unsigned int prev_pos = SA_and_LCP[0] & POS_MASK;
-
- *top = 0;
- intervals[0] = 0;
- next_interval_idx = 1;
-
- for (int r = 1; r < nInWindowSize; r++) {
- const unsigned int next_pos = SA_and_LCP[r] & POS_MASK;
- const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK;
- const unsigned int top_lcp = *top & LCP_MASK;
-
- if (next_lcp == top_lcp) {
- /* Continuing the deepest open interval */
- pos_data[prev_pos] = *top;
- }
- else if (next_lcp > top_lcp) {
- /* Opening a new interval */
- *++top = next_lcp | next_interval_idx++;
- pos_data[prev_pos] = *top;
- }
- else {
- /* Closing the deepest open interval */
- pos_data[prev_pos] = *top;
- for (;;) {
- const unsigned int closed_interval_idx = *top-- & POS_MASK;
- const unsigned int superinterval_lcp = *top & LCP_MASK;
-
- if (next_lcp == superinterval_lcp) {
- /* Continuing the superinterval */
- intervals[closed_interval_idx] = *top;
- break;
- }
- else if (next_lcp > superinterval_lcp) {
- /* Creating a new interval that is a
- * superinterval of the one being
- * closed, but still a subinterval of
- * its superinterval */
- *++top = next_lcp | next_interval_idx++;
- intervals[closed_interval_idx] = *top;
- break;
- }
- else {
- /* Also closing the superinterval */
- intervals[closed_interval_idx] = *top;
- }
- }
- }
- prev_pos = next_pos;
- }
-
- /* Close any still-open intervals. */
- pos_data[prev_pos] = *top;
- for (; top > pCompressor->open_intervals; top--)
- intervals[*top & POS_MASK] = *(top - 1);
-
- /* Success */
- return 0;
-}
-
-/**
- * Find matches at the specified offset in the input window
- *
- * @param pCompressor compression context
- * @param nOffset offset to find matches at, in the input window
- * @param pMatches pointer to returned matches
- * @param nMaxMatches maximum number of matches to return (0 for none)
- *
- * @return number of matches
- */
-static int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
- unsigned int *intervals = pCompressor->intervals;
- unsigned int *pos_data = pCompressor->pos_data;
- unsigned int ref;
- unsigned int super_ref;
- unsigned int match_pos;
- lzsa_match *matchptr;
-
- /**
- * Find matches using intervals
- *
- * Taken from wimlib (CC0 license):
- * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD
- */
-
- /* Get the deepest lcp-interval containing the current suffix. */
- ref = pos_data[nOffset];
-
- pos_data[nOffset] = 0;
-
- /* Ascend until we reach a visited interval, the root, or a child of the
- * root. Link unvisited intervals to the current suffix as we go. */
- while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
- intervals[ref & POS_MASK] = nOffset;
- ref = super_ref;
- }
-
- if (super_ref == 0) {
- /* In this case, the current interval may be any of:
- * (1) the root;
- * (2) an unvisited child of the root;
- * (3) an interval last visited by suffix 0
- *
- * We could avoid the ambiguity with (3) by using an lcp
- * placeholder value other than 0 to represent "visited", but
- * it's fastest to use 0. So we just don't allow matches with
- * position 0. */
-
- if (ref != 0) /* Not the root? */
- intervals[ref & POS_MASK] = nOffset;
- return 0;
- }
-
- /* Ascend indirectly via pos_data[] links. */
- match_pos = super_ref;
- matchptr = pMatches;
- for (;;) {
- while ((super_ref = pos_data[match_pos]) > ref)
- match_pos = intervals[super_ref & POS_MASK];
- intervals[ref & POS_MASK] = nOffset;
- pos_data[match_pos] = ref;
-
- if ((matchptr - pMatches) < nMaxMatches) {
- int nMatchOffset = (int)(nOffset - match_pos);
-
- if (nMatchOffset <= MAX_OFFSET) {
- matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
- matchptr->offset = (unsigned short)nMatchOffset;
- matchptr++;
- }
- }
-
- if (super_ref == 0)
- break;
- ref = super_ref;
- match_pos = intervals[ref & POS_MASK];
- }
-
- return (int)(matchptr - pMatches);
-}
-
-/**
- * Skip previously compressed bytes
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically 0)
- * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
- */
-static void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
- lzsa_match match;
- int i;
-
- /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
- * we don't store the matches. */
- for (i = nStartOffset; i < nEndOffset; i++) {
- lzsa_find_matches_at(pCompressor, i, &match, 0);
- }
-}
-
-/**
- * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
- * the optimizer to look at.
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
- lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
- int i;
-
- for (i = nStartOffset; i < nEndOffset; i++) {
- int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET);
- int m;
-
- for (m = 0; m < NMATCHES_PER_OFFSET; m++) {
- if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) {
- pMatch->length = 0;
- pMatch->offset = 0;
- }
- else {
- int nMaxLen = (nEndOffset - LAST_LITERALS) - i;
- if (nMaxLen < 0)
- nMaxLen = 0;
- if (pMatch->length > nMaxLen)
- pMatch->length = (unsigned short)nMaxLen;
- }
-
- pMatch++;
- }
- }
-}
-
-/**
- * Get the number of extra bits required to represent a literals length
- *
- * @param nLength literals length
- *
- * @return number of extra bits required
- */
-static inline int lzsa_get_literals_varlen_size(const int nLength) {
- if (nLength < LITERALS_RUN_LEN) {
- return 0;
- }
- else {
- if (nLength < 256)
- return 8;
- else {
- if (nLength < 512)
- return 16;
- else
- return 24;
- }
- }
-}
-
-/**
- * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
- * room to write the bytes.
- *
- * @param pOutData pointer to output buffer
- * @param nOutOffset current write index into output buffer
- * @param nLength literals length
- */
-static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
- if (nLength >= LITERALS_RUN_LEN) {
- if (nLength < 256)
- pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
- else {
- if (nLength < 512) {
- pOutData[nOutOffset++] = 250;
- pOutData[nOutOffset++] = nLength - 256;
- }
- else {
- pOutData[nOutOffset++] = 249;
- pOutData[nOutOffset++] = nLength & 0xff;
- pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
- }
- }
- }
-
- return nOutOffset;
-}
-
-/**
- * Get the number of extra bits required to represent an encoded match length
- *
- * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
- *
- * @return number of extra bits required
- */
-static inline int lzsa_get_match_varlen_size(const int nLength) {
- if (nLength < MATCH_RUN_LEN) {
- return 0;
- }
- else {
- if ((nLength + MIN_MATCH_SIZE) < 256)
- return 8;
- else {
- if ((nLength + MIN_MATCH_SIZE) < 512)
- return 16;
- else
- return 24;
- }
- }
-}
-
-/**
- * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
- * room to write the bytes.
- *
- * @param pOutData pointer to output buffer
- * @param nOutOffset current write index into output buffer
- * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
- */
-static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
- if (nLength >= MATCH_RUN_LEN) {
- if ((nLength + MIN_MATCH_SIZE) < 256)
- pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
- else {
- if ((nLength + MIN_MATCH_SIZE) < 512) {
- pOutData[nOutOffset++] = 239;
- pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE - 256;
- }
- else {
- pOutData[nOutOffset++] = 238;
- pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE) & 0xff;
- pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE) >> 8) & 0xff;
- }
- }
- }
-
- return nOutOffset;
-}
-
-/**
- * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
- int *cost = (int*)pCompressor->pos_data; /* Reuse */
- int nLastLiteralsOffset;
- int nMinMatchSize = pCompressor->min_match_size;
- const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
- int i;
-
- cost[nEndOffset - 1] = 8;
- nLastLiteralsOffset = nEndOffset;
-
- for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
- int nBestCost, nBestMatchLen, nBestMatchOffset;
-
- int nLiteralsLen = nLastLiteralsOffset - i;
- nBestCost = 8 + cost[i + 1];
- if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == 256 || nLiteralsLen == 512) {
- /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
- * The cost automatically accumulates down the chain. */
- nBestCost += 8;
- }
- if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
- nBestCost += MODESWITCH_PENALTY;
- nBestMatchLen = 0;
- nBestMatchOffset = 0;
-
- lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
- int m;
-
- for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
- int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
-
- if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
- int nCurCost;
- int nMatchLen = pMatch[m].length;
-
- if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
- nMatchLen = nEndOffset - LAST_LITERALS - i;
-
- nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE);
- nCurCost += cost[i + nMatchLen];
- if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
- nCurCost += MODESWITCH_PENALTY;
-
- if (nBestCost > (nCurCost - nFavorRatio)) {
- nBestCost = nCurCost;
- nBestMatchLen = nMatchLen;
- nBestMatchOffset = pMatch[m].offset;
- }
- }
- else {
- int nMatchLen = pMatch[m].length;
- int k, nMatchRunLen;
-
- if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
- nMatchLen = nEndOffset - LAST_LITERALS - i;
-
- nMatchRunLen = nMatchLen;
- if (nMatchRunLen > MATCH_RUN_LEN)
- nMatchRunLen = MATCH_RUN_LEN;
-
- for (k = nMinMatchSize; k < nMatchRunLen; k++) {
- int nCurCost;
-
- nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
- nCurCost += cost[i + k];
- if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
- nCurCost += MODESWITCH_PENALTY;
-
- if (nBestCost > (nCurCost - nFavorRatio)) {
- nBestCost = nCurCost;
- nBestMatchLen = k;
- nBestMatchOffset = pMatch[m].offset;
- }
- }
-
- for (; k <= nMatchLen; k++) {
- int nCurCost;
-
- nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE);
- nCurCost += cost[i + k];
- if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
- nCurCost += MODESWITCH_PENALTY;
-
- if (nBestCost > (nCurCost - nFavorRatio)) {
- nBestCost = nCurCost;
- nBestMatchLen = k;
- nBestMatchOffset = pMatch[m].offset;
- }
- }
- }
- }
-
- if (nBestMatchLen >= MIN_MATCH_SIZE)
- nLastLiteralsOffset = i;
-
- cost[i] = nBestCost;
- pMatch->length = nBestMatchLen;
- pMatch->offset = nBestMatchOffset;
- }
-}
-
-/**
- * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
- * impacting the compression ratio
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- *
- * @return non-zero if the number of tokens was reduced, 0 if it wasn't
- */
-static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
- int i;
- int nNumLiterals = 0;
- int nDidReduce = 0;
-
- for (i = nStartOffset; i < nEndOffset; ) {
- lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-
- if (pMatch->length >= MIN_MATCH_SIZE) {
- int nMatchLen = pMatch->length;
- int nReduce = 0;
-
- if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ {
- int nMatchOffset = pMatch->offset;
- int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
- int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
-
- if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) {
- if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nMatchLen))) {
- /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
- * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
- * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
- * match command by literals, the output size will not increase and it will remove one command. */
- nReduce = 1;
- }
- }
- else {
- int nCurIndex = i + nMatchLen;
- int nNextNumLiterals = 0;
-
- do {
- nCurIndex++;
- nNextNumLiterals++;
- } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE);
-
- if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size(nNextNumLiterals))) {
- /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
- * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
- nReduce = 1;
- }
- }
- }
-
- if (nReduce) {
- int j;
-
- for (j = 0; j < nMatchLen; j++) {
- pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
- }
- nNumLiterals += nMatchLen;
- i += nMatchLen;
-
- nDidReduce = 1;
- }
- else {
- if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
- pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
- (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) {
- /* Join */
-
- pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
- pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
- pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
- continue;
- }
-
- nNumLiterals = 0;
- i += nMatchLen;
- }
- }
- else {
- nNumLiterals++;
- i++;
- }
- }
-
- return nDidReduce;
-}
-
-/**
- * Emit block of compressed data
- *
- * @param pCompressor compression context
- * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- * @param pOutData pointer to output buffer
- * @param nMaxOutDataSize maximum size of output buffer, in bytes
- *
- * @return size of compressed data in output buffer, or -1 if the data is uncompressible
- */
-static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
- int i;
- int nNumLiterals = 0;
- int nInFirstLiteralOffset = 0;
- int nOutOffset = 0;
-
- for (i = nStartOffset; i < nEndOffset; ) {
- lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-
- if (pMatch->length >= MIN_MATCH_SIZE) {
- int nMatchOffset = pMatch->offset;
- int nMatchLen = pMatch->length;
- int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
- int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
- int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen;
- int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
- int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
-
- if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
- return -1;
- if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
- return -1;
-
- pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
- nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
-
- if (nNumLiterals != 0) {
- memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
- nOutOffset += nNumLiterals;
- nNumLiterals = 0;
- }
-
- pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
- if (nTokenLongOffset) {
- pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
- }
- nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
- i += nMatchLen;
-
- pCompressor->num_commands++;
- }
- else {
- if (nNumLiterals == 0)
- nInFirstLiteralOffset = i;
- nNumLiterals++;
- i++;
- }
- }
-
- {
- int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
- int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3);
-
- if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
- return -1;
-
- if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
- pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
- else
- pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
- nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
-
- if (nNumLiterals != 0) {
- memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
- nOutOffset += nNumLiterals;
- nNumLiterals = 0;
- }
-
- pCompressor->num_commands++;
- }
-
- if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
- /* Emit EOD marker for raw block */
-
- if ((nOutOffset + 4) > nMaxOutDataSize)
- return -1;
-
- pOutData[nOutOffset++] = 0;
- pOutData[nOutOffset++] = 238;
- pOutData[nOutOffset++] = 0;
- pOutData[nOutOffset++] = 0;
- }
-
- return nOutOffset;
-}
-
-/**
- * Compress one block of data
- *
- * @param pCompressor compression context
- * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
- * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
- * @param nInDataSize number of input bytes to compress
- * @param pOutData pointer to output buffer
- * @param nMaxOutDataSize maximum size of output buffer, in bytes
- *
- * @return size of compressed data in output buffer, or -1 if the data is uncompressible
- */
-int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
- if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
- return -1;
- if (nPreviousBlockSize) {
- lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
- }
- lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
- lzsa_optimize_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
-
- int nDidReduce;
- int nPasses = 0;
- do {
- nDidReduce = lzsa_optimize_command_count(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
- nPasses++;
- } while (nDidReduce && nPasses < 20);
-
- return lzsa_write_block(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
-}
-
-/**
- * Get the number of compression commands issued in compressed data blocks
- *
- * @return number of commands
- */
-int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
- return pCompressor->num_commands;
-}
diff --git a/src/shrink_v1.c b/src/shrink_v1.c
new file mode 100644
index 0000000..1ca3918
--- /dev/null
+++ b/src/shrink_v1.c
@@ -0,0 +1,460 @@
+/*
+ * shrink_v1.c - LZSA1 block compressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+#include
+#include "lib.h"
+#include "shrink_v1.h"
+#include "format.h"
+
+/**
+ * Get the number of extra bits required to represent a literals length
+ *
+ * @param nLength literals length
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
+ if (nLength < LITERALS_RUN_LEN_V1) {
+ return 0;
+ }
+ else {
+ if (nLength < 256)
+ return 8;
+ else {
+ if (nLength < 512)
+ return 16;
+ else
+ return 24;
+ }
+ }
+}
+
+/**
+ * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength literals length
+ */
+static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
+ if (nLength >= LITERALS_RUN_LEN_V1) {
+ if (nLength < 256)
+ pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
+ else {
+ if (nLength < 512) {
+ pOutData[nOutOffset++] = 250;
+ pOutData[nOutOffset++] = nLength - 256;
+ }
+ else {
+ pOutData[nOutOffset++] = 249;
+ pOutData[nOutOffset++] = nLength & 0xff;
+ pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
+ }
+ }
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent an encoded match length
+ *
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
+ if (nLength < MATCH_RUN_LEN_V1) {
+ return 0;
+ }
+ else {
+ if ((nLength + MIN_MATCH_SIZE_V1) < 256)
+ return 8;
+ else {
+ if ((nLength + MIN_MATCH_SIZE_V1) < 512)
+ return 16;
+ else
+ return 24;
+ }
+ }
+}
+
+/**
+ * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
+ */
+static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
+ if (nLength >= MATCH_RUN_LEN_V1) {
+ if ((nLength + MIN_MATCH_SIZE_V1) < 256)
+ pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
+ else {
+ if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
+ pOutData[nOutOffset++] = 239;
+ pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
+ }
+ else {
+ pOutData[nOutOffset++] = 238;
+ pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
+ pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
+ }
+ }
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+static void lzsa_optimize_matches_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ int *cost = (int*)pCompressor->pos_data; /* Reuse */
+ int nLastLiteralsOffset;
+ int nMinMatchSize = pCompressor->min_match_size;
+ const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+ int i;
+
+ cost[nEndOffset - 1] = 8;
+ nLastLiteralsOffset = nEndOffset;
+
+ for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
+ int nBestCost, nBestMatchLen, nBestMatchOffset;
+
+ int nLiteralsLen = nLastLiteralsOffset - i;
+ nBestCost = 8 + cost[i + 1];
+ if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
+ /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+ * The cost automatically accumulates down the chain. */
+ nBestCost += 8;
+ }
+ if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+ nBestCost += MODESWITCH_PENALTY;
+ nBestMatchLen = 0;
+ nBestMatchOffset = 0;
+
+ lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+ int m;
+
+ for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
+ int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
+
+ if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
+ int nCurCost;
+ int nMatchLen = pMatch[m].length;
+
+ if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+ nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+ nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
+ nCurCost += cost[i + nMatchLen];
+ if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+ nCurCost += MODESWITCH_PENALTY;
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = nMatchLen;
+ nBestMatchOffset = pMatch[m].offset;
+ }
+ }
+ else {
+ int nMatchLen = pMatch[m].length;
+ int k, nMatchRunLen;
+
+ if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+ nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+ nMatchRunLen = nMatchLen;
+ if (nMatchRunLen > MATCH_RUN_LEN_V1)
+ nMatchRunLen = MATCH_RUN_LEN_V1;
+
+ for (k = nMinMatchSize; k < nMatchRunLen; k++) {
+ int nCurCost;
+
+ nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
+ nCurCost += cost[i + k];
+ if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+ nCurCost += MODESWITCH_PENALTY;
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = k;
+ nBestMatchOffset = pMatch[m].offset;
+ }
+ }
+
+ for (; k <= nMatchLen; k++) {
+ int nCurCost;
+
+ nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
+ nCurCost += cost[i + k];
+ if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
+ nCurCost += MODESWITCH_PENALTY;
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = k;
+ nBestMatchOffset = pMatch[m].offset;
+ }
+ }
+ }
+ }
+
+ if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
+ nLastLiteralsOffset = i;
+
+ cost[i] = nBestCost;
+ pMatch->length = nBestMatchLen;
+ pMatch->offset = nBestMatchOffset;
+ }
+}
+
+/**
+ * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
+ * impacting the compression ratio
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int lzsa_optimize_command_count_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ int i;
+ int nNumLiterals = 0;
+ int nDidReduce = 0;
+
+ for (i = nStartOffset; i < nEndOffset; ) {
+ lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+
+ if (pMatch->length >= MIN_MATCH_SIZE_V1) {
+ int nMatchLen = pMatch->length;
+ int nReduce = 0;
+
+ if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ {
+ int nMatchOffset = pMatch->offset;
+ int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
+
+ if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
+ if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
+ /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
+ * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
+ * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
+ * match command by literals, the output size will not increase and it will remove one command. */
+ nReduce = 1;
+ }
+ }
+ else {
+ int nCurIndex = i + nMatchLen;
+ int nNextNumLiterals = 0;
+
+ do {
+ nCurIndex++;
+ nNextNumLiterals++;
+ } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
+
+ if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
+ /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
+ * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
+ nReduce = 1;
+ }
+ }
+ }
+
+ if (nReduce) {
+ int j;
+
+ for (j = 0; j < nMatchLen; j++) {
+ pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
+ }
+ nNumLiterals += nMatchLen;
+ i += nMatchLen;
+
+ nDidReduce = 1;
+ }
+ else {
+ if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
+ pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
+ (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) {
+ /* Join */
+
+ pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
+ pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
+ pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
+ continue;
+ }
+
+ nNumLiterals = 0;
+ i += nMatchLen;
+ }
+ }
+ else {
+ nNumLiterals++;
+ i++;
+ }
+ }
+
+ return nDidReduce;
+}
+
+/**
+ * Emit block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int lzsa_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
+ int i;
+ int nNumLiterals = 0;
+ int nInFirstLiteralOffset = 0;
+ int nOutOffset = 0;
+
+ for (i = nStartOffset; i < nEndOffset; ) {
+ lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+
+ if (pMatch->length >= MIN_MATCH_SIZE_V1) {
+ int nMatchOffset = pMatch->offset;
+ int nMatchLen = pMatch->length;
+ int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
+ int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
+ int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
+ int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
+
+ if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
+ return -1;
+ if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
+ return -1;
+
+ pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
+ nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
+
+ if (nNumLiterals != 0) {
+ memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+ nOutOffset += nNumLiterals;
+ nNumLiterals = 0;
+ }
+
+ pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+ if (nTokenLongOffset) {
+ pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
+ }
+ nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
+ i += nMatchLen;
+
+ pCompressor->num_commands++;
+ }
+ else {
+ if (nNumLiterals == 0)
+ nInFirstLiteralOffset = i;
+ nNumLiterals++;
+ i++;
+ }
+ }
+
+ {
+ int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
+
+ if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
+ return -1;
+
+ if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
+ pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
+ else
+ pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
+ nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
+
+ if (nNumLiterals != 0) {
+ memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+ nOutOffset += nNumLiterals;
+ nNumLiterals = 0;
+ }
+
+ pCompressor->num_commands++;
+ }
+
+ if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
+ /* Emit EOD marker for raw block */
+
+ if ((nOutOffset + 4) > nMaxOutDataSize)
+ return -1;
+
+ pOutData[nOutOffset++] = 0;
+ pOutData[nOutOffset++] = 238;
+ pOutData[nOutOffset++] = 0;
+ pOutData[nOutOffset++] = 0;
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+ lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+ int nDidReduce;
+ int nPasses = 0;
+ do {
+ nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+ nPasses++;
+ } while (nDidReduce && nPasses < 20);
+
+ return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
+}
diff --git a/src/shrink_v1.h b/src/shrink_v1.h
new file mode 100644
index 0000000..aa1a7d5
--- /dev/null
+++ b/src/shrink_v1.h
@@ -0,0 +1,53 @@
+/*
+ * shrink_v1.h - LZSA1 block compressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_V1_H
+#define _SHRINK_V1_H
+
+/* Forward declarations */
+typedef struct _lsza_compressor lsza_compressor;
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
+
+#endif /* _SHRINK_V1_H */
diff --git a/src/shrink_v2.c b/src/shrink_v2.c
new file mode 100644
index 0000000..27b8d76
--- /dev/null
+++ b/src/shrink_v2.c
@@ -0,0 +1,733 @@
+/*
+ * shrink_v2.c - LZSA2 block compressor implementation
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#include
+#include
+#include
+#include "lib.h"
+#include "shrink_v2.h"
+#include "format.h"
+
+/**
+ * Write 4-bit nibble to output (compressed) buffer
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
+ * @param nCurFreeNibbles current number of free nibbles in byte
+ * @param nNibbleValue value to write (0..15)
+ */
+static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) {
+ if (nOutOffset < 0) return -1;
+
+ if ((*nCurNibbleOffset) == -1) {
+ if (nOutOffset >= nMaxOutDataSize) return -1;
+ (*nCurNibbleOffset) = nOutOffset;
+ (*nCurFreeNibbles) = 2;
+ pOutData[nOutOffset++] = 0;
+ }
+
+ pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f);
+ (*nCurFreeNibbles)--;
+ if ((*nCurFreeNibbles) == 0) {
+ (*nCurNibbleOffset) = -1;
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent a literals length
+ *
+ * @param nLength literals length
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_literals_varlen_size_v2(const int nLength) {
+ if (nLength < LITERALS_RUN_LEN_V2) {
+ return 0;
+ }
+ else {
+ if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
+ return 4;
+ }
+ else {
+ if (nLength < 256)
+ return 4+8;
+ else {
+ return 4+24;
+ }
+ }
+ }
+}
+
+/**
+ * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength literals length
+ */
+static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
+ if (nLength >= LITERALS_RUN_LEN_V2) {
+ if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2);
+ }
+ else {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
+ if (nOutOffset < 0) return -1;
+
+ if (nLength < 256)
+ pOutData[nOutOffset++] = nLength;
+ else {
+ pOutData[nOutOffset++] = 0;
+ pOutData[nOutOffset++] = nLength & 0xff;
+ pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
+ }
+ }
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Get the number of extra bits required to represent an encoded match length
+ *
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
+ *
+ * @return number of extra bits required
+ */
+static inline int lzsa_get_match_varlen_size_v2(const int nLength) {
+ if (nLength < MATCH_RUN_LEN_V2) {
+ return 0;
+ }
+ else {
+ if (nLength < (MATCH_RUN_LEN_V2 + 15))
+ return 4;
+ else {
+ if ((nLength + MIN_MATCH_SIZE_V2) < 256)
+ return 4+8;
+ else {
+ return 4 + 24;
+ }
+ }
+ }
+}
+
+/**
+ * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
+ * room to write the bytes.
+ *
+ * @param pOutData pointer to output buffer
+ * @param nOutOffset current write index into output buffer
+ * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
+ */
+static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
+ if (nLength >= MATCH_RUN_LEN_V2) {
+ if (nLength < (MATCH_RUN_LEN_V2 + 15)) {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2);
+ }
+ else {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
+ if (nOutOffset < 0) return -1;
+
+ if ((nLength + MIN_MATCH_SIZE_V2) < 256)
+ pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2;
+ else {
+ pOutData[nOutOffset++] = 0;
+ pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff;
+ pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff;
+ }
+ }
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ */
+static void lzsa_optimize_matches_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ int *cost = (int*)pCompressor->pos_data; /* Reuse */
+ int *prev_match = (int*)pCompressor->intervals; /* Reuse */
+ lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+ lzsa_match *pBestMatch = pCompressor->best_match;
+ int nLastLiteralsOffset;
+ int nMinMatchSize = pCompressor->min_match_size;
+ const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+ int i;
+
+ cost[nEndOffset - 1] = 8;
+ prev_match[nEndOffset - 1] = nEndOffset;
+ nLastLiteralsOffset = nEndOffset;
+
+ pCompressor->best_match[nEndOffset - 1].length = 0;
+ pCompressor->best_match[nEndOffset - 1].offset = 0;
+
+ repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
+ repmatch_opt[nEndOffset - 1].incoming_offset = -1;
+ repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
+
+ for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
+ int nLiteralsCost;
+
+ int nLiteralsLen = nLastLiteralsOffset - i;
+ nLiteralsCost = 8 + cost[i + 1];
+ if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
+ /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+ * The cost automatically accumulates down the chain. */
+ nLiteralsCost += 4;
+ }
+ else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
+ /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+ * The cost automatically accumulates down the chain. */
+ nLiteralsCost += 8;
+ }
+ else if (nLiteralsLen == 256) {
+ /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
+ * The cost automatically accumulates down the chain. */
+ nLiteralsCost += 16;
+ }
+ if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
+ nLiteralsCost += MODESWITCH_PENALTY;
+
+ lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
+ int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
+ int m;
+
+ cost[i] = nLiteralsCost;
+ pCompressor->best_match[i].length = 0;
+ pCompressor->best_match[i].offset = 0;
+
+ repmatch_opt[i].best_slot_for_incoming = -1;
+ repmatch_opt[i].incoming_offset = -1;
+ repmatch_opt[i].expected_repmatch = 0;
+
+ for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
+ int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
+
+ nBestCost = nLiteralsCost;
+ nBestMatchLen = 0;
+ nBestMatchOffset = 0;
+ nBestUpdatedSlot = -1;
+ nBestUpdatedIndex = -1;
+ nBestExpectedRepMatch = 0;
+
+ if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
+ int nCurCost;
+ int nMatchLen = pMatch[m].length;
+
+ if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+ nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+ int nCurIndex = prev_match[i + nMatchLen];
+
+ int nMatchOffsetSize = 0;
+ int nCurExpectedRepMatch = 1;
+ if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+ pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+ nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+ nCurExpectedRepMatch = 0;
+ }
+
+ nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
+ nCurCost += cost[i + nMatchLen];
+ if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
+ nCurCost += MODESWITCH_PENALTY;
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = nMatchLen;
+ nBestMatchOffset = pMatch[m].offset;
+ nBestUpdatedSlot = -1;
+ nBestUpdatedIndex = -1;
+ nBestExpectedRepMatch = nCurExpectedRepMatch;
+ }
+ }
+ else {
+ int nMatchLen = pMatch[m].length;
+ int k, nMatchRunLen;
+
+ if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
+ nMatchLen = nEndOffset - LAST_LITERALS - i;
+
+ nMatchRunLen = nMatchLen;
+ if (nMatchRunLen > MATCH_RUN_LEN_V2)
+ nMatchRunLen = MATCH_RUN_LEN_V2;
+
+ for (k = nMinMatchSize; k < nMatchRunLen; k++) {
+ int nCurCost;
+
+ int nCurIndex = prev_match[i + k];
+ int nMatchOffsetSize = 0;
+ int nCurExpectedRepMatch = 1;
+ if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+ pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+ nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+ nCurExpectedRepMatch = 0;
+ }
+
+ nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
+ nCurCost += cost[i + k];
+ if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
+ nCurCost += MODESWITCH_PENALTY;
+
+ int nCurUpdatedSlot = -1;
+ int nCurUpdatedIndex = -1;
+
+ if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
+ int r;
+
+ for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
+ if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
+ int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
+
+ if (nAltCost <= nCurCost) {
+ nCurUpdatedSlot = r;
+ nCurUpdatedIndex = nCurIndex;
+ nCurCost = nAltCost;
+ nCurExpectedRepMatch = 2;
+ }
+ }
+ }
+ }
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = k;
+ nBestMatchOffset = pMatch[m].offset;
+ nBestUpdatedSlot = nCurUpdatedSlot;
+ nBestUpdatedIndex = nCurUpdatedIndex;
+ nBestExpectedRepMatch = nCurExpectedRepMatch;
+ }
+ }
+
+ for (; k <= nMatchLen; k++) {
+ int nCurCost;
+
+ int nCurIndex = prev_match[i + k];
+ int nMatchOffsetSize = 0;
+ int nCurExpectedRepMatch = 1;
+ if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+ pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
+ nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
+ nCurExpectedRepMatch = 0;
+ }
+
+ nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
+ nCurCost += cost[i + k];
+ if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
+ nCurCost += MODESWITCH_PENALTY;
+
+ int nCurUpdatedSlot = -1;
+ int nCurUpdatedIndex = -1;
+
+ if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
+ int r;
+
+ for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
+ if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
+ int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
+
+ if (nAltCost <= nCurCost) {
+ nCurUpdatedSlot = r;
+ nCurUpdatedIndex = nCurIndex;
+ nCurCost = nAltCost;
+ nCurExpectedRepMatch = 2;
+ }
+ }
+ }
+ }
+
+ if (nBestCost > (nCurCost - nFavorRatio)) {
+ nBestCost = nCurCost;
+ nBestMatchLen = k;
+ nBestMatchOffset = pMatch[m].offset;
+ nBestUpdatedSlot = nCurUpdatedSlot;
+ nBestUpdatedIndex = nCurUpdatedIndex;
+ nBestExpectedRepMatch = nCurExpectedRepMatch;
+ }
+ }
+ }
+
+ pSlotCost[m] = nBestCost;
+ pMatch[m].length = nBestMatchLen;
+ pMatch[m].offset = nBestMatchOffset; /* not necessary */
+
+ if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
+ cost[i] = nBestCost;
+ pCompressor->best_match[i].length = nBestMatchLen;
+ pCompressor->best_match[i].offset = nBestMatchOffset;
+
+ repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
+
+ if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
+ repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
+ repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
+ }
+ }
+ }
+ for (; m < NMATCHES_PER_OFFSET; m++) {
+ pSlotCost[m] = 0;
+ }
+
+ if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
+ nLastLiteralsOffset = i;
+
+ prev_match[i] = nLastLiteralsOffset;
+ }
+
+ int nIncomingOffset = -1;
+ for (i = nStartOffset; i < nEndOffset; ) {
+ if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
+ if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
+ lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
+ int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
+
+ pCompressor->best_match[i].length = pMatch->length;
+ pCompressor->best_match[i].offset = pMatch->offset;
+ cost[i] = *pSlotCost;
+
+ if (repmatch_opt[i].expected_repmatch == 2)
+ repmatch_opt[i].expected_repmatch = 1;
+ }
+ else {
+ if (repmatch_opt[i].expected_repmatch == 2)
+ repmatch_opt[i].expected_repmatch = 0;
+ }
+
+ nIncomingOffset = i;
+ i += pCompressor->best_match[i].length;
+ }
+ else {
+ i++;
+ }
+ }
+}
+
+/**
+ * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
+ * impacting the compression ratio
+ *
+ * @param pCompressor compression context
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ *
+ * @return non-zero if the number of tokens was reduced, 0 if it wasn't
+ */
+static int lzsa_optimize_command_count_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+ int i;
+ int nNumLiterals = 0;
+ int nDidReduce = 0;
+ int nPreviousMatchOffset = -1;
+ lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+
+ for (i = nStartOffset; i < nEndOffset; ) {
+ lzsa_match *pMatch = pCompressor->best_match + i;
+
+ if (pMatch->length >= MIN_MATCH_SIZE_V2) {
+ int nMatchLen = pMatch->length;
+ int nReduce = 0;
+ int nCurrentMatchOffset = i;
+
+ if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ {
+ int nMatchOffset = pMatch->offset;
+ int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
+ int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16;
+
+ if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
+
+ if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) {
+ nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
+ }
+
+ if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
+ /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
+ * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
+ * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
+ * match command by literals, the output size will not increase and it will remove one command. */
+ nReduce = 1;
+ }
+ }
+ else {
+ int nCurIndex = i + nMatchLen;
+ int nNextNumLiterals = 0;
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
+
+ do {
+ nCurIndex++;
+ nNextNumLiterals++;
+ } while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2);
+
+ if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
+ pCompressor->best_match[nCurIndex].offset != nMatchOffset) {
+ nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
+ }
+
+ if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
+ /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
+ * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
+ nReduce = 1;
+ }
+ }
+ }
+
+ if (nReduce) {
+ int j;
+
+ for (j = 0; j < nMatchLen; j++) {
+ pCompressor->best_match[i + j].length = 0;
+ }
+ nNumLiterals += nMatchLen;
+ i += nMatchLen;
+
+ nDidReduce = 1;
+
+ if (nPreviousMatchOffset >= 0) {
+ repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
+ nPreviousMatchOffset = -1;
+ }
+ }
+ else {
+ if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
+ pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
+ (nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_OFFSET) {
+ /* Join */
+
+ pMatch->length += pCompressor->best_match[i + nMatchLen].length;
+ pCompressor->best_match[i + nMatchLen].offset = 0;
+ pCompressor->best_match[i + nMatchLen].length = -1;
+ continue;
+ }
+
+ nNumLiterals = 0;
+ i += nMatchLen;
+ }
+
+ nPreviousMatchOffset = nCurrentMatchOffset;
+ }
+ else {
+ nNumLiterals++;
+ i++;
+ }
+ }
+
+ return nDidReduce;
+}
+
+/**
+ * Emit block of compressed data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+static int lzsa_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
+ int i;
+ int nNumLiterals = 0;
+ int nInFirstLiteralOffset = 0;
+ int nOutOffset = 0;
+ int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
+ int nRepMatchOffset = 0;
+ lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
+
+ for (i = nStartOffset; i < nEndOffset; ) {
+ lzsa_match *pMatch = pCompressor->best_match + i;
+
+ if (pMatch->length >= MIN_MATCH_SIZE_V2) {
+ int nMatchOffset = pMatch->offset;
+ int nMatchLen = pMatch->length;
+ int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
+ int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
+ int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen;
+ int nTokenOffsetMode;
+ int nOffsetSize;
+
+ if (nMatchOffset == nRepMatchOffset) {
+ nTokenOffsetMode = 0xe0;
+ nOffsetSize = 0;
+ }
+ else {
+ if (nMatchOffset <= 32) {
+ nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
+ nOffsetSize = 4;
+ }
+ else if (nMatchOffset <= 512) {
+ nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
+ nOffsetSize = 8;
+ }
+ else if (nMatchOffset <= (8192 + 512)) {
+ nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
+ nOffsetSize = 12;
+ }
+ else {
+ nTokenOffsetMode = 0xc0;
+ nOffsetSize = 16;
+ }
+ }
+
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
+
+ if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
+ return -1;
+ if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
+ return -1;
+
+ pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen;
+ nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
+ if (nOutOffset < 0) return -1;
+
+ if (nNumLiterals != 0) {
+ memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+ nOutOffset += nNumLiterals;
+ nNumLiterals = 0;
+ }
+
+ if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
+ if (nOutOffset < 0) return -1;
+ }
+ else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
+ pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+ }
+ else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
+ pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
+ if (nOutOffset < 0) return -1;
+ }
+ else if (nTokenOffsetMode == 0xc0) {
+ pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
+ pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
+ }
+ nRepMatchOffset = nMatchOffset;
+
+ nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
+ if (nOutOffset < 0) return -1;
+
+ i += nMatchLen;
+
+ pCompressor->num_commands++;
+ }
+ else {
+ if (nNumLiterals == 0)
+ nInFirstLiteralOffset = i;
+ nNumLiterals++;
+ i++;
+ }
+ }
+
+ {
+ int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
+ int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
+
+ if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
+ return -1;
+
+ if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
+ pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47;
+ else
+ pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00;
+ nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
+ if (nOutOffset < 0) return -1;
+
+ if (nNumLiterals != 0) {
+ memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
+ nOutOffset += nNumLiterals;
+ nNumLiterals = 0;
+ }
+
+ pCompressor->num_commands++;
+ }
+
+ if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
+ /* Emit EOD marker for raw block */
+
+ if (nOutOffset >= nMaxOutDataSize)
+ return -1;
+ pOutData[nOutOffset++] = 0; /* Match offset */
+
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */
+ if (nOutOffset < 0) return -1;
+
+ if ((nOutOffset + 3) > nMaxOutDataSize)
+ return -1;
+
+ pOutData[nOutOffset++] = 0; /* Extended match length byte */
+ pOutData[nOutOffset++] = 0; /* 16-bit match length */
+ pOutData[nOutOffset++] = 0;
+ }
+
+ if (nCurNibbleOffset != -1) {
+ nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0);
+ if (nOutOffset < 0 || nCurNibbleOffset != -1)
+ return -1;
+ }
+
+ return nOutOffset;
+}
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
+ lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+
+ int nDidReduce;
+ int nPasses = 0;
+ do {
+ nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+ nPasses++;
+ } while (nDidReduce && nPasses < 20);
+
+ return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
+}
diff --git a/src/shrink_v2.h b/src/shrink_v2.h
new file mode 100644
index 0000000..8817810
--- /dev/null
+++ b/src/shrink_v2.h
@@ -0,0 +1,53 @@
+/*
+ * shrink_v2.h - LZSA2 block compressor definitions
+ *
+ * Copyright (C) 2019 Emmanuel Marty
+ *
+ * This software is provided 'as-is', without any express or implied
+ * warranty. In no event will the authors be held liable for any damages
+ * arising from the use of this software.
+ *
+ * Permission is granted to anyone to use this software for any purpose,
+ * including commercial applications, and to alter it and redistribute it
+ * freely, subject to the following restrictions:
+ *
+ * 1. The origin of this software must not be misrepresented; you must not
+ * claim that you wrote the original software. If you use this software
+ * in a product, an acknowledgment in the product documentation would be
+ * appreciated but is not required.
+ * 2. Altered source versions must be plainly marked as such, and must not be
+ * misrepresented as being the original software.
+ * 3. This notice may not be removed or altered from any source distribution.
+ */
+
+/*
+ * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
+ *
+ * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
+ * With help, ideas, optimizations and speed measurements by spke
+ * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
+ * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
+ *
+ */
+
+#ifndef _SHRINK_V2_H
+#define _SHRINK_V2_H
+
+/* Forward declarations */
+typedef struct _lsza_compressor lsza_compressor;
+
+/**
+ * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
+ *
+ * @param pCompressor compression context
+ * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
+ * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
+ * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
+ * @param pOutData pointer to output buffer
+ * @param nMaxOutDataSize maximum size of output buffer, in bytes
+ *
+ * @return size of compressed data in output buffer, or -1 if the data is uncompressible
+ */
+int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
+
+#endif /* _SHRINK_V2_H */