diff --git a/LICENSE b/LICENSE index 32f668c..29b28c1 100644 --- a/LICENSE +++ b/LICENSE @@ -1,3 +1,3 @@ -The LZSA code is available under the Zlib license, except for src/shrink.c which is placed under the Creative Commons CC0 license. +The LZSA code is available under the Zlib license, except for src/matchfinder.c which is placed under the Creative Commons CC0 license. Please consult LICENSE.zlib.md and LICENSE.CC0.md for more information. diff --git a/Makefile b/Makefile index c04753b..a9d3601 100755 --- a/Makefile +++ b/Makefile @@ -10,10 +10,14 @@ $(OBJDIR)/%.o: src/../%.c APP := lzsa -OBJS := $(OBJDIR)/src/main.o +OBJS := $(OBJDIR)/src/lzsa.o OBJS += $(OBJDIR)/src/frame.o -OBJS += $(OBJDIR)/src/shrink.o -OBJS += $(OBJDIR)/src/expand.o +OBJS += $(OBJDIR)/src/lib.o +OBJS += $(OBJDIR)/src/matchfinder.o +OBJS += $(OBJDIR)/src/shrink_v1.o +OBJS += $(OBJDIR)/src/shrink_v2.o +OBJS += $(OBJDIR)/src/expand_v1.o +OBJS += $(OBJDIR)/src/expand_v2.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o diff --git a/README.md b/README.md index 08abb66..790612a 100755 --- a/README.md +++ b/README.md @@ -40,7 +40,7 @@ Inspirations: License: * The LZSA code is available under the Zlib license. -* The compressor (shrink.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder. +* The match finder (matchfinder.c) is available under the CC0 license due to using portions of code from Eric Bigger's Wimlib in the suffix array-based matchfinder. # Stream format diff --git a/VS2017/lzsa.vcxproj b/VS2017/lzsa.vcxproj index 8461495..89325d5 100755 --- a/VS2017/lzsa.vcxproj +++ b/VS2017/lzsa.vcxproj @@ -177,24 +177,32 @@ - + + + - + + + - + + + - - + + + + diff --git a/VS2017/lzsa.vcxproj.filters b/VS2017/lzsa.vcxproj.filters index 6767357..511e8d4 100755 --- a/VS2017/lzsa.vcxproj.filters +++ b/VS2017/lzsa.vcxproj.filters @@ -27,15 +27,9 @@ Fichiers d%27en-tĂȘte - - Fichiers sources - Fichiers sources - - Fichiers sources - Fichiers sources\libdivsufsort\include @@ -48,17 +42,26 @@ Fichiers sources + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + - - Fichiers sources - - - Fichiers sources - - - Fichiers sources - Fichiers sources\libdivsufsort\lib @@ -74,5 +77,26 @@ Fichiers sources + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + + + Fichiers sources + \ No newline at end of file diff --git a/asm/6502/decompress.asm b/asm/6502/decompress_v1.asm similarity index 95% rename from asm/6502/decompress.asm rename to asm/6502/decompress_v1.asm index f359d15..9289d32 100755 --- a/asm/6502/decompress.asm +++ b/asm/6502/decompress_v1.asm @@ -1,5 +1,5 @@ ; ----------------------------------------------------------------------------- -; Decompress raw LZSA block. Create one with lzsa -r +; Decompress raw LZSA1 block. Create one with lzsa -r ; ; in: ; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address @@ -31,7 +31,7 @@ OFFSLO = $43 ; zero-page location for temp offset OFFSHI = $44 -DECOMPRESS_LZSA +DECOMPRESS_LZSA1 LDY #$00 DECODE_TOKEN diff --git a/asm/6502/decompress_v2.asm b/asm/6502/decompress_v2.asm new file mode 100755 index 0000000..7c950ae --- /dev/null +++ b/asm/6502/decompress_v2.asm @@ -0,0 +1,245 @@ +; ----------------------------------------------------------------------------- +; Decompress raw LZSA2 block. +; Create one with lzsa -r -f2 +; +; in: +; * LZSA_SRC_LO and LZSA_SRC_HI contain the compressed raw block address +; * LZSA_DST_LO and LZSA_DST_HI contain the destination buffer address +; +; out: +; * LZSA_DST_LO and LZSA_DST_HI contain the last decompressed byte address, +1 +; ----------------------------------------------------------------------------- +; +; Copyright (C) 2019 Emmanuel Marty +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. +; ----------------------------------------------------------------------------- + +OFFSLO = $43 ; zero-page location for temp offset +OFFSHI = $44 +FIXUP = $4B +NIBBLES = $FB +NIBCOUNT = $FC + +DECOMPRESS_LZSA2 + LDY #$00 + STY NIBBLES + STY NIBCOUNT + +DECODE_TOKEN + JSR GETSRC ; read token byte: XYZ|LL|MMM + PHA ; preserve token on stack + + AND #$18 ; isolate literals count (LL) + BEQ NO_LITERALS ; skip if no literals to copy + CMP #$18 ; LITERALS_RUN_LEN_V2 << 3? + BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token + + JSR GETNIBBLE ; get extra literals length nibble + CLC ; add nibble to len from token + ADC #$03 ; (LITERALS_RUN_LEN_V2) + CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ? + BNE PREPARE_COPY_LITERALS ; if less, literals count is complete + + JSR GETSRC ; get extra byte of variable literals count + TAX ; non-zero? + BNE PREPARE_COPY_LITERALS_HIGH ; if so, literals count is complete + + ; handle 16 bits literals count + ; literals count = directly these 16 bits + JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A + TAY ; put high 8 bits in Y + JMP PREPARE_COPY_LITERALS_HIGH + +EMBEDDED_LITERALS + LSR A ; shift literals count into place + LSR A + LSR A + +PREPARE_COPY_LITERALS + TAX +PREPARE_COPY_LITERALS_HIGH + INY + +COPY_LITERALS + JSR GETPUT ; copy one byte of literals + DEX + BNE COPY_LITERALS + DEY + BNE COPY_LITERALS + +NO_LITERALS + PLA ; retrieve token from stack + PHA ; preserve token again + BMI REPMATCH_OR_LARGE_OFFSET ; 1YZ: rep-match or 13/16 bit offset + + ASL ; 0YZ: 5 or 9 bit offset + BMI OFFSET_9_BIT + + ; 00Z: 5 bit offset + LSR A ; Shift Z (offset bit 4) in place + LSR A + AND #$10 + STA FIXUP + + JSR GETNIBBLE ; get nibble for offset bits 0-3 + ORA FIXUP ; merge offset bit 4 + ORA #$E0 ; set offset bits 7-5 to 1 + TAX ; store low byte of match offset + LDA #$0FF ; set offset bits 15-8 to 1 + BNE GOT_OFFSET ; (*same as JMP GOT_OFFSET but shorter) + +OFFSET_9_BIT ; 01Z: 9 bit offset + ASL ; shift Z (offset bit 8) in place + ROL + ROL + ORA #$FE ; set offset bits 15-9 to 1 + STA OFFSHI + + JSR GETSRC ; get offset bits 0-7 from stream in A + TAX ; store low byte of match offset + JMP GOT_OFFSET_LO ; go prepare match + +REPMATCH_OR_LARGE_OFFSET + ASL ; 13 bit offset? + BMI REPMATCH_OR_16_BIT ; handle rep-match or 16-bit offset if not + + ; 10Z: 13 bit offset + + LSR A ; shift Z (offset bit 4) in place + LSR A + AND #$10 + STA FIXUP + + JSR GETSRC ; get offset bits 0-7 from stream in A + TAX ; store low byte of match offset + + JSR GETNIBBLE ; get nibble for offset bits 8-11 + ORA FIXUP ; merge offset bit 12 + CLC + ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512) + BNE GOT_OFFSET ; go prepare match (*same as JMP GOT_OFFSET but shorter) + +REPMATCH_OR_16_BIT ; rep-match or 16 bit offset + ASL ; XYZ=111? + BMI REP_MATCH ; reuse previous offset if so (rep-match) + + ; 110: handle 16 bit offset + JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A + +GOT_OFFSET + STA OFFSHI ; store final match offset +GOT_OFFSET_LO + STX OFFSLO + +REP_MATCH + CLC ; add dest + match offset + LDA PUTDST+1 ; low 8 bits + ADC OFFSLO + STA COPY_MATCH_LOOP+1 ; store back reference address + LDA OFFSHI ; high 8 bits + ADC PUTDST+2 + STA COPY_MATCH_LOOP+2 ; store high 8 bits of address + + PLA ; retrieve token from stack again + AND #$07 ; isolate match len (MMM) + CLC + ADC #$02 ; add MIN_MATCH_SIZE_V2 + CMP #$09 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2? + BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token + + JSR GETNIBBLE ; get extra match length nibble + CLC ; add nibble to len from token + ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) + CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15? + BNE PREPARE_COPY_MATCH ; if less, match length is complete + + JSR GETSRC ; get extra byte of variable match length + TAX ; non-zero? + BNE PREPARE_COPY_MATCH_Y ; if so, the match length is complete + + ; Handle 16 bits match length + JSR GETLARGESRC ; grab low 8 bits in X, high 8 bits in A + TAY ; put high 8 bits in Y + ; large match length with zero high byte? + BEQ DECOMPRESSION_DONE ; if so, this is the EOD code, bail + TXA + +PREPARE_COPY_MATCH + TAX +PREPARE_COPY_MATCH_Y + INY + +COPY_MATCH_LOOP + LDA $AAAA ; get one byte of backreference + INC COPY_MATCH_LOOP+1 + BNE GETMATCH_DONE + INC COPY_MATCH_LOOP+2 +GETMATCH_DONE + JSR PUTDST ; copy to destination + DEX + BNE COPY_MATCH_LOOP + DEY + BNE COPY_MATCH_LOOP + JMP DECODE_TOKEN + +GETNIBBLE + DEC NIBCOUNT + BPL HAS_NIBBLES + + LDA #$01 + STA NIBCOUNT + JSR GETSRC ; get 2 nibbles + STA NIBBLES + LSR A + LSR A + LSR A + LSR A + RTS + +HAS_NIBBLES + LDA NIBBLES + AND #$0F ; isolate low 4 bits of nibble + RTS + +GETPUT + JSR GETSRC +PUTDST +LZSA_DST_LO = *+1 +LZSA_DST_HI = *+2 + STA $AAAA + INC PUTDST+1 + BNE PUTDST_DONE + INC PUTDST+2 +PUTDST_DONE +DECOMPRESSION_DONE + RTS + +GETLARGESRC + JSR GETSRC ; grab low 8 bits + TAX ; move to X + ; fall through grab high 8 bits + +GETSRC +LZSA_SRC_LO = *+1 +LZSA_SRC_HI = *+2 + LDA $AAAA + INC GETSRC+1 + BNE GETSRC_DONE + INC GETSRC+2 +GETSRC_DONE + RTS diff --git a/asm/8088/decompress_small.S b/asm/8088/decompress_small_v1.S similarity index 95% rename from asm/8088/decompress_small.S rename to asm/8088/decompress_small_v1.S index 813e4fd..e691dde 100755 --- a/asm/8088/decompress_small.S +++ b/asm/8088/decompress_small_v1.S @@ -22,15 +22,15 @@ bits 16 ; --------------------------------------------------------------------------- -; Decompress raw LZSA block +; Decompress raw LZSA1 block ; inputs: -; * ds:si: raw LZSA block +; * ds:si: raw LZSA1 block ; * es:di: output buffer ; output: ; * ax: decompressed size ; --------------------------------------------------------------------------- -lzsa_decompress: +lzsa1_decompress: push di ; remember decompression offset cld ; make string operations (lods, movs, stos..) move forward diff --git a/asm/8088/decompress_small_v2.S b/asm/8088/decompress_small_v2.S new file mode 100755 index 0000000..61e99f7 --- /dev/null +++ b/asm/8088/decompress_small_v2.S @@ -0,0 +1,174 @@ +; decompress_small.S - space-efficient decompressor implementation for 8088 +; +; Copyright (C) 2019 Emmanuel Marty +; +; This software is provided 'as-is', without any express or implied +; warranty. In no event will the authors be held liable for any damages +; arising from the use of this software. +; +; Permission is granted to anyone to use this software for any purpose, +; including commercial applications, and to alter it and redistribute it +; freely, subject to the following restrictions: +; +; 1. The origin of this software must not be misrepresented; you must not +; claim that you wrote the original software. If you use this software +; in a product, an acknowledgment in the product documentation would be +; appreciated but is not required. +; 2. Altered source versions must be plainly marked as such, and must not be +; misrepresented as being the original software. +; 3. This notice may not be removed or altered from any source distribution. + + segment .text + bits 16 + +; --------------------------------------------------------------------------- +; Decompress raw LZSA2 block +; inputs: +; * ds:si: raw LZSA2 block +; * es:di: output buffer +; output: +; * ax: decompressed size +; --------------------------------------------------------------------------- + +lzsa2_decompress: + push di ; remember decompression offset + cld ; make string operations (lods, movs, stos..) move forward + + xor cx,cx + xor bx,bx + xor bp,bp + +.decode_token: + mov ax,cx ; clear ah - cx is zero from above or from after rep movsb in .copy_match + lodsb ; read token byte: XYZ|LL|MMMM + mov dx,ax ; keep token in dl + + and al,018H ; isolate literals length in token (LL) + mov cl,3 + shr al,cl ; shift literals length into place + + cmp al,03H ; LITERALS_RUN_LEN_V2? + jne .got_literals ; no, we have the full literals count from the token, go copy + + call .get_nibble ; get extra literals length nibble + add al,cl ; add len from token to nibble + cmp al,012H ; LITERALS_RUN_LEN_V2 + 15 ? + jne .got_literals ; if not, we have the full literals count, go copy + + lodsb ; grab extra length byte + test al,al ; zero? + jne .got_literals ; if not, we have the full literals count, go copy + + lodsw ; grab 16-bit extra length + +.got_literals: + xchg cx,ax + rep movsb ; copy cx literals from ds:si to es:di + + test dl,dl ; check match offset mode in token (X bit) + js .rep_match_or_large_offset + + cmp dl,040H ; check if this is a 5 or 9-bit offset (Y bit) + jnb .offset_9_bit + + ; 5 bit offset + xchg ax,cx ; clear ah - cx is zero from the rep movsb above + mov al,020H ; shift Z (offset bit 4) in place + and al,dl + shr al,1 + call .get_nibble ; get nibble for offset bits 0-3 + or al,cl ; merge nibble + or al,0E0H ; set offset bits 7-5 to 1 + dec ah ; set offset bits 15-8 to 1 + jmp short .get_match_length + +.offset_9_bit: ; 9 bit offset + xchg ax,cx ; clear ah - cx is zero from the rep movsb above + lodsb ; get 8 bit offset from stream in A + dec ah ; set offset bits 15-8 to 1 + test dl,020H ; test bit Z (offset bit 8) + jne .get_match_length + dec ah ; clear bit 8 if Z bit is clear + jmp short .get_match_length + +.rep_match_or_large_offset: + cmp dl,0c0H ; check if this is a 13-bit offset or a 16-bit offset/rep match (Y bit) + jnb .rep_match_or_16_bit + + ; 13 bit offset + lodsb ; load match offset bits 0-7 + + mov ah,020H ; shift Z (offset bit 12) in place + and ah,dl + shr ah,1 + call .get_nibble ; get nibble for offset bits 8-11 + or ah,cl ; merge nibble + or ah,0E0H ; set offset bits 15-13 to 1 + sub ah,2 ; substract 512 + jmp short .get_match_length + +.rep_match_or_16_bit: + test dl,020H ; test bit Z (offset bit 8) + jne .repeat_match ; rep-match + + ; 16 bit offset + lodsw ; Get 2-byte match offset + +.get_match_length: + mov bp,ax ; bp: offset +.repeat_match: + mov ax,dx ; ax: original token + and al,07H ; isolate match length in token (MMM) + add al,2 ; add MIN_MATCH_SIZE_V2 + + cmp al,09H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2? + jne .got_matchlen ; no, we have the full match length from the token, go copy + + call .get_nibble ; get extra literals length nibble + add al,cl ; add len from token to nibble + cmp al,018H ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15? + jne .got_matchlen ; no, we have the full match length from the token, go copy + + lodsb ; grab extra length byte + test al,al ; zero? + jne .got_matchlen ; if not, we have the entire length + + lodsw ; grab 16-bit length + test ax,ax ; bail if we hit EOD + je short .done_decompressing + +.got_matchlen: + xchg cx,ax ; copy match length into cx + push ds ; save ds:si (current pointer to compressed data) + xchg si,ax + push es + pop ds + mov si,di ; ds:si now points at back reference in output data + add si,bp + rep movsb ; copy match + xchg si,ax ; restore ds:si + pop ds + jmp .decode_token ; go decode another token + +.done_decompressing: + pop ax ; retrieve the original decompression offset + xchg ax,di ; compute decompressed size + sub ax,di + ret ; done + +.get_nibble: + dec bh ; nibble ready? + jns .has_nibble + + mov cx,ax + lodsb ; load two nibbles + mov bl,al + mov bh,1 + mov ax,cx + +.has_nibble: + mov cl,4 ; swap 4 high and low bits of nibble + ror bl,cl + mov cl,0FH + and cl,bl + ret diff --git a/src/expand.c b/src/expand_v1.c old mode 100755 new mode 100644 similarity index 72% rename from src/expand.c rename to src/expand_v1.c index eed5326..c19fb08 --- a/src/expand.c +++ b/src/expand_v1.c @@ -1,5 +1,5 @@ /* - * expand.c - block decompressor implementation + * expand_v1.c - LZSA1 block decompressor implementation * * Copyright (C) 2019 Emmanuel Marty * @@ -20,11 +20,21 @@ * 3. This notice may not be removed or altered from any source distribution. */ +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + #include #include #include #include "format.h" -#include "expand.h" +#include "expand_v1.h" #ifdef _MSC_VER #define FORCE_INLINE __forceinline @@ -32,11 +42,11 @@ #define FORCE_INLINE __attribute__((always_inline)) #endif /* _MSC_VER */ -static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { +static inline FORCE_INLINE int lzsa_expand_literals_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { const unsigned char *pInBlock = *ppInBlock; unsigned char *pCurOutData = *ppCurOutData; - if (nLiterals == LITERALS_RUN_LEN) { + if (nLiterals == LITERALS_RUN_LEN_V1) { unsigned char nByte; if (pInBlock < pInBlockEnd) { @@ -83,12 +93,12 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p return 0; } -static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { +static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { const unsigned char *pInBlock = *ppInBlock; unsigned char *pCurOutData = *ppCurOutData; - nMatchLen += MIN_MATCH_SIZE; - if (nMatchLen == (MATCH_RUN_LEN + MIN_MATCH_SIZE)) { + nMatchLen += MIN_MATCH_SIZE_V1; + if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) { unsigned char nByte; if (pInBlock < pInBlockEnd) { @@ -159,7 +169,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn } /** - * Decompress one data block + * Decompress one LZSA1 data block * * @param pInBlock pointer to compressed data * @param nInBlockSize size of compressed data, in bytes @@ -169,7 +179,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { +int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { const unsigned char *pInBlockEnd = pInBlock + nBlockSize; const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; unsigned char *pCurOutData = pOutData + nOutDataOffset; @@ -182,36 +192,35 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch const unsigned char token = *pInBlock++; unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); - if (nLiterals < LITERALS_RUN_LEN) { + if (nLiterals < LITERALS_RUN_LEN_V1) { memcpy(pCurOutData, pInBlock, 8); pInBlock += nLiterals; pCurOutData += nLiterals; } else { - if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) + if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) return -1; } if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ int nMatchOffset; - nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff)); + nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00; if (token & 0x80) { - nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8); + nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8); } - nMatchOffset++; - const unsigned char *pSrc = pCurOutData - nMatchOffset; + const unsigned char *pSrc = pCurOutData + nMatchOffset; if (pSrc >= pOutData) { unsigned int nMatchLen = (unsigned int)(token & 0x0f); - if (nMatchLen < MATCH_RUN_LEN && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + if (nMatchLen < MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { memcpy(pCurOutData, pSrc, 8); memcpy(pCurOutData + 8, pSrc + 8, 8); memcpy(pCurOutData + 16, pSrc + 16, 4); - pCurOutData += (MIN_MATCH_SIZE + nMatchLen); + pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen); } else { - if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) + if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) return -1; } } @@ -227,22 +236,21 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch const unsigned char token = *pInBlock++; unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); - if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) + if (lzsa_expand_literals_slow_v1(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd)) return -1; if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ int nMatchOffset; - nMatchOffset = ((unsigned int)(*pInBlock++ ^ 0xff)); + nMatchOffset = ((unsigned int)(*pInBlock++)) | 0xffffff00; if (token & 0x80) { - nMatchOffset |= (((unsigned int)(*pInBlock++ ^ 0xff)) << 8); + nMatchOffset = (nMatchOffset & 0xffff00ff) | (((unsigned int)(*pInBlock++)) << 8); } - nMatchOffset++; - const unsigned char *pSrc = pCurOutData - nMatchOffset; + const unsigned char *pSrc = pCurOutData + nMatchOffset; if (pSrc >= pOutData) { unsigned int nMatchLen = (unsigned int)(token & 0x0f); - if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) + if (lzsa_expand_match_slow_v1(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) return -1; } else { diff --git a/src/expand.h b/src/expand_v1.h old mode 100755 new mode 100644 similarity index 65% rename from src/expand.h rename to src/expand_v1.h index 06cbcf9..d6f44b9 --- a/src/expand.h +++ b/src/expand_v1.h @@ -1,5 +1,5 @@ /* - * expand.h - block decompressor definitions + * expand_v1.h - LZSA1 block decompressor definitions * * Copyright (C) 2019 Emmanuel Marty * @@ -20,11 +20,21 @@ * 3. This notice may not be removed or altered from any source distribution. */ -#ifndef _EXPAND_H -#define _EXPAND_H +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_V1_H +#define _EXPAND_V1_H /** - * Decompress one data block + * Decompress one LZSA1 data block * * @param pInBlock pointer to compressed data * @param nInBlockSize size of compressed data, in bytes @@ -34,6 +44,6 @@ * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); +int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); -#endif /* _EXPAND_H */ +#endif /* _EXPAND_V1_H */ diff --git a/src/expand_v2.c b/src/expand_v2.c new file mode 100644 index 0000000..2c993e5 --- /dev/null +++ b/src/expand_v2.c @@ -0,0 +1,330 @@ +/* + * expand_v2.c - LZSA2 block decompressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "format.h" +#include "expand_v2.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else /* _MSC_VER */ +#define FORCE_INLINE __attribute__((always_inline)) +#endif /* _MSC_VER */ + +static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles) { + unsigned int nValue; + + if ((*nCurNibbles ^= 1) != 0) { + const unsigned char *pInBlock = *ppInBlock; + if (pInBlock >= pInBlockEnd) return -1; + (*nibbles) = *pInBlock++; + *ppInBlock = pInBlock; + } + + nValue = ((unsigned int)((*nibbles) & 0xf0)) >> 4; + + (*nibbles) <<= 4; + + return nValue; +} + +static inline FORCE_INLINE int lzsa_expand_literals_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int nLiterals, int *nCurNibbles, unsigned char *nibbles, + unsigned char **ppCurOutData, const unsigned char *pOutDataEnd) { + const unsigned char *pInBlock = *ppInBlock; + unsigned char *pCurOutData = *ppCurOutData; + + if (nLiterals == LITERALS_RUN_LEN_V2) { + nLiterals += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles); + + if (nLiterals == (LITERALS_RUN_LEN_V2 + 15)) { + if (pInBlock < pInBlockEnd) { + nLiterals = ((unsigned int)*pInBlock++); + + if (nLiterals == 0) { + if ((pInBlock + 1) < pInBlockEnd) { + nLiterals = ((unsigned int)*pInBlock++); + nLiterals |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + } + else { + return -1; + } + } + } + + if (nLiterals != 0) { + if ((pInBlock + nLiterals) <= pInBlockEnd && + (pCurOutData + nLiterals) <= pOutDataEnd) { + memcpy(pCurOutData, pInBlock, nLiterals); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + return -1; + } + } + + *ppInBlock = pInBlock; + *ppCurOutData = pCurOutData; + return 0; +} + +static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, const unsigned char *pSrc, unsigned int nMatchLen, int *nCurNibbles, unsigned char *nibbles, + unsigned char **ppCurOutData, const unsigned char *pOutDataEnd, const unsigned char *pOutDataFastEnd) { + const unsigned char *pInBlock = *ppInBlock; + unsigned char *pCurOutData = *ppCurOutData; + + nMatchLen += MIN_MATCH_SIZE_V2; + if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) { + nMatchLen += lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, nCurNibbles, nibbles); + + if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 + 15)) { + if (pInBlock < pInBlockEnd) { + nMatchLen = ((unsigned int)*pInBlock++); + + if (nMatchLen == 0) { + if ((pInBlock + 1) < pInBlockEnd) { + nMatchLen = ((unsigned int)*pInBlock++); + nMatchLen |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + } + else { + return -1; + } + } + } + + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if ((pCurOutData - pSrc) >= 8 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 8); + memcpy(pCopyDst + 8, pCopySrc + 8, 8); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen >= 4) { + *pCurOutData++ = *pSrc++; + *pCurOutData++ = *pSrc++; + *pCurOutData++ = *pSrc++; + *pCurOutData++ = *pSrc++; + nMatchLen -= 4; + } + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + + *ppInBlock = pInBlock; + *ppCurOutData = pCurOutData; + return 0; +} + +/** + * Decompress one LZSA2 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { + const unsigned char *pInBlockEnd = pInBlock + nBlockSize; + const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; + unsigned char *pCurOutData = pOutData + nOutDataOffset; + const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; + const unsigned char *pOutDataFastEnd = pOutDataEnd - 20; + int nCurNibbles = 0; + unsigned char nibbles; + int nMatchOffset = 0; + + /* Fast loop */ + + while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) { + const unsigned char token = *pInBlock++; + unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3); + + if (nLiterals < LITERALS_RUN_LEN_V2) { + memcpy(pCurOutData, pInBlock, 8); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd)) + return -1; + } + + if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ + unsigned char nOffsetMode = token & 0xc0; + + switch (nOffsetMode) { + case 0x00: + /* 5 bit offset */ + nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles); + nMatchOffset |= ((token & 0x20) >> 1); + nMatchOffset |= 0xffffffe0; + break; + + case 0x40: + /* 9 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); + nMatchOffset |= 0xfffffe00; + break; + + case 0x80: + /* 13 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); + nMatchOffset |= 0xffffe000; + nMatchOffset -= 512; + break; + + default: + /* Check if this is a 16 bit offset or a rep-match */ + if ((token & 0x20) == 0) { + /* 16 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8); + nMatchOffset |= 0xffff0000; + } + break; + } + + const unsigned char *pSrc = pCurOutData + nMatchOffset; + if (pSrc >= pOutData) { + unsigned int nMatchLen = (unsigned int)(token & 0x07); + if (nMatchLen < MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 4); + pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen); + } + else { + if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) + return -1; + } + } + else { + return -1; + } + } + } + + /* Slow loop for the remainder of the buffer */ + + while (pInBlock < pInBlockEnd) { + const unsigned char token = *pInBlock++; + unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3); + + if (lzsa_expand_literals_slow_v2(&pInBlock, pInBlockEnd, nLiterals, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd)) + return -1; + + if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ + unsigned char nOffsetMode = token & 0xc0; + + switch (nOffsetMode) { + case 0x00: + /* 5 bit offset */ + nMatchOffset = (unsigned int)lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles); + nMatchOffset |= ((token & 0x20) >> 1); + nMatchOffset |= 0xffffffe0; + break; + + case 0x40: + /* 9 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); + nMatchOffset |= 0xfffffe00; + break; + + case 0x80: + /* 13 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles) << 8); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); + nMatchOffset |= 0xffffe000; + nMatchOffset -= 512; + break; + + default: + /* Check if this is a 16 bit offset or a rep-match */ + if ((token & 0x20) == 0) { + /* 16 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8); + nMatchOffset |= 0xffff0000; + } + break; + } + + const unsigned char *pSrc = pCurOutData + nMatchOffset; + if (pSrc >= pOutData) { + unsigned int nMatchLen = (unsigned int)(token & 0x07); + if (lzsa_expand_match_slow_v2(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &nCurNibbles, &nibbles, &pCurOutData, pOutDataEnd, pOutDataFastEnd)) + return -1; + } + else { + return -1; + } + } + } + + return (int)(pCurOutData - (pOutData + nOutDataOffset)); +} diff --git a/src/expand_v2.h b/src/expand_v2.h new file mode 100644 index 0000000..906965c --- /dev/null +++ b/src/expand_v2.h @@ -0,0 +1,49 @@ +/* + * expand_v2.h - LZSA2 block decompressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_V2_H +#define _EXPAND_V2_H + +/** + * Decompress one LZSA2 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); + +#endif /* _EXPAND_V2_H */ diff --git a/src/format.h b/src/format.h index 899b7b4..c9534fa 100755 --- a/src/format.h +++ b/src/format.h @@ -20,13 +20,28 @@ * 3. This notice may not be removed or altered from any source distribution. */ +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + #ifndef _FORMAT_H #define _FORMAT_H -#define MIN_MATCH_SIZE 3 #define MIN_OFFSET 1 #define MAX_OFFSET 0xffff -#define LITERALS_RUN_LEN 7 -#define MATCH_RUN_LEN 15 + +#define MIN_MATCH_SIZE_V1 3 +#define LITERALS_RUN_LEN_V1 7 +#define MATCH_RUN_LEN_V1 15 + +#define MIN_MATCH_SIZE_V2 2 +#define LITERALS_RUN_LEN_V2 3 +#define MATCH_RUN_LEN_V2 7 #endif /* _FORMAT_H */ diff --git a/src/frame.c b/src/frame.c index ba99904..edbe531 100644 --- a/src/frame.c +++ b/src/frame.c @@ -20,9 +20,18 @@ * 3. This notice may not be removed or altered from any source distribution. */ +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + #include #include "frame.h" -#include "shrink.h" #define LZSA_ID_0 0x7b #define LZSA_ID_1 0x9e @@ -53,11 +62,11 @@ int lzsa_get_frame_size(void) { * * @return number of encoded bytes, or -1 for failure */ -int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize) { - if (nMaxFrameDataSize >= 3) { +int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) { + if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) { pFrameData[0] = LZSA_ID_0; /* Magic number */ pFrameData[1] = LZSA_ID_1; - pFrameData[2] = 0; /* Format version 1 */ + pFrameData[2] = (nFormatVersion == 2) ? 0x20 : 0; /* Format version 1 */ return 3; } @@ -139,14 +148,16 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS * * @return 0 for success, or -1 for failure */ -int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize) { +int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion) { if (nFrameDataSize != 3 || pFrameData[0] != LZSA_ID_0 || pFrameData[1] != LZSA_ID_1 || - pFrameData[2] != 0) { + (pFrameData[2] & 0x1f) != 0 || + ((pFrameData[2] & 0xe0) != 0x00 && (pFrameData[2] & 0xe0) != 0x20)) { return -1; } else { + *nFormatVersion = (pFrameData[2] & 0xe0) ? 2 : 1; return 0; } } diff --git a/src/frame.h b/src/frame.h index e858110..520b065 100644 --- a/src/frame.h +++ b/src/frame.h @@ -20,6 +20,16 @@ * 3. This notice may not be removed or altered from any source distribution. */ +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + #ifndef _FRAME_H #define _FRAME_H @@ -45,7 +55,7 @@ int lzsa_get_frame_size(void); * * @return number of encoded bytes, or -1 for failure */ -int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize); +int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion); /** * Encode compressed block frame header @@ -87,7 +97,7 @@ int lzsa_encode_footer_frame(unsigned char *pFrameData, const int nMaxFrameDataS * * @return 0 for success, or -1 for failure */ -int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize); +int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize, int *nFormatVersion); /** * Decode frame header diff --git a/src/lib.c b/src/lib.c new file mode 100755 index 0000000..5a516b2 --- /dev/null +++ b/src/lib.c @@ -0,0 +1,217 @@ +/* + * lib.c - LZSA library implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "lib.h" +#include "matchfinder.h" +#include "shrink_v1.h" +#include "shrink_v2.h" +#include "expand_v1.h" +#include "expand_v2.h" +#include "format.h" + +/** + * Initialize compression context + * + * @param pCompressor compression context to initialize + * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress) + * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE) + * @param nFlags compression flags + * + * @return 0 for success, non-zero for failure + */ +int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) { + int nResult; + int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2; + + nResult = divsufsort_init(&pCompressor->divsufsort_context); + pCompressor->intervals = NULL; + pCompressor->pos_data = NULL; + pCompressor->open_intervals = NULL; + pCompressor->match = NULL; + pCompressor->best_match = NULL; + pCompressor->slot_cost = NULL; + pCompressor->repmatch_opt = NULL; + pCompressor->min_match_size = nMinMatchSize; + if (pCompressor->min_match_size < nMinMatchSizeForFormat) + pCompressor->min_match_size = nMinMatchSizeForFormat; + else if (pCompressor->min_match_size > 5) + pCompressor->min_match_size = 5; + pCompressor->format_version = nFormatVersion; + pCompressor->flags = nFlags; + pCompressor->num_commands = 0; + + if (!nResult) { + pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + + if (pCompressor->intervals) { + pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + + if (pCompressor->pos_data) { + pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int)); + + if (pCompressor->open_intervals) { + pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match)); + + if (pCompressor->match) { + if (pCompressor->format_version == 2) { + pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match)); + + if (pCompressor->best_match) { + pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int)); + + if (pCompressor->slot_cost) { + pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt)); + + if (pCompressor->repmatch_opt) + return 0; + } + } + } + else { + return 0; + } + } + } + } + } + } + + lzsa_compressor_destroy(pCompressor); + return 100; +} + +/** + * Clean up compression context and free up any associated resources + * + * @param pCompressor compression context to clean up + */ +void lzsa_compressor_destroy(lsza_compressor *pCompressor) { + divsufsort_destroy(&pCompressor->divsufsort_context); + + if (pCompressor->repmatch_opt) { + free(pCompressor->repmatch_opt); + pCompressor->repmatch_opt = NULL; + } + + if (pCompressor->slot_cost) { + free(pCompressor->slot_cost); + pCompressor->slot_cost = NULL; + } + + if (pCompressor->best_match) { + free(pCompressor->best_match); + pCompressor->best_match = NULL; + } + + if (pCompressor->match) { + free(pCompressor->match); + pCompressor->match = NULL; + } + + if (pCompressor->open_intervals) { + free(pCompressor->open_intervals); + pCompressor->open_intervals = NULL; + } + + if (pCompressor->pos_data) { + free(pCompressor->pos_data); + pCompressor->pos_data = NULL; + } + + if (pCompressor->intervals) { + free(pCompressor->intervals); + pCompressor->intervals = NULL; + } +} + +/** + * Compress one block of data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none) + * @param nInDataSize number of input bytes to compress + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) + return -1; + if (nPreviousBlockSize) { + lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize); + } + lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + if (pCompressor->format_version == 1) { + return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize); + } + else if (pCompressor->format_version == 2) { + return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize); + } + else { + return -1; + } +} + +/** + * Get the number of compression commands issued in compressed data blocks + * + * @return number of commands + */ +int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) { + return pCompressor->num_commands; +} + +/** + * Decompress one data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { + if (nFormatVersion == 1) + return lzsa_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + else if (nFormatVersion == 2) + return lzsa_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + else + return -1; +} diff --git a/src/shrink.h b/src/lib.h similarity index 57% rename from src/shrink.h rename to src/lib.h index fcf3592..331b7cb 100755 --- a/src/shrink.h +++ b/src/lib.h @@ -1,5 +1,5 @@ /* - * shrink.h - block compressor definitions + * lib.h - LZSA library definitions * * Copyright (C) 2019 Emmanuel Marty * @@ -20,8 +20,18 @@ * 3. This notice may not be removed or altered from any source distribution. */ -#ifndef _SHRINK_H -#define _SHRINK_H +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _LIB_H +#define _LIB_H #include "divsufsort.h" @@ -29,17 +39,46 @@ #define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */ #define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */ -/* Forward declarations */ -typedef struct _lzsa_match lzsa_match; +#define LCP_BITS 15 +#define LCP_MAX (1<<(LCP_BITS - 1)) +#define LCP_SHIFT (32-LCP_BITS) +#define LCP_MASK (((1< + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + #include #include #include @@ -31,17 +41,18 @@ #endif #include "format.h" #include "frame.h" -#include "shrink.h" -#include "expand.h" +#include "lib.h" #define BLOCK_SIZE 65536 #define OPT_VERBOSE 1 #define OPT_RAW 2 #define OPT_FAVOR_RATIO 4 +#define TOOL_VERSION "0.6.0" + /*---------------------------------------------------------------------------*/ -static long long lzsa_get_time() { +static long long do_get_time() { long long nTime; #ifdef _WIN32 @@ -60,7 +71,7 @@ static long long lzsa_get_time() { /*---------------------------------------------------------------------------*/ -static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) { +static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) { FILE *f_in, *f_out; unsigned char *pInData, *pOutData; lsza_compressor compressor; @@ -146,7 +157,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, nFlags |= LZSA_FLAG_FAVOR_RATIO; if (nOptions & OPT_RAW) nFlags |= LZSA_FLAG_RAW_BLOCK; - nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags); + nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags); if (nResult != 0) { free(pOutData); pOutData = NULL; @@ -165,7 +176,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, } if ((nOptions & OPT_RAW) == 0) { - int nHeaderSize = lzsa_encode_header(cFrameData, 16); + int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion); if (nHeaderSize < 0) bError = true; else { @@ -175,7 +186,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, } if (nOptions & OPT_VERBOSE) { - nStartTime = lzsa_get_time(); + nStartTime = do_get_time(); } int nPreviousBlockSize = 0; @@ -280,7 +291,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, nCompressedSize += (long long)nFooterSize; if (!bError && (nOptions & OPT_VERBOSE)) { - nEndTime = lzsa_get_time(); + nEndTime = do_get_time(); double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; @@ -315,7 +326,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, /*---------------------------------------------------------------------------*/ -static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { +static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL; unsigned int nFileSize = 0; @@ -338,7 +349,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename return 100; } - if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) { + if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { fclose(pInFile); pInFile = NULL; fprintf(stderr, "invalid magic number or format version in input file\n"); @@ -423,7 +434,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename } if (nOptions & OPT_VERBOSE) { - nStartTime = lzsa_get_time(); + nStartTime = do_get_time(); } int nDecompressionError = 0; @@ -476,7 +487,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename else { unsigned int nBlockOffs = 0; - nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); + nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); if (nDecompressedSize < 0) { nDecompressionError = nDecompressedSize; break; @@ -518,7 +529,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename } else { if (nOptions & OPT_VERBOSE) { - nEndTime = lzsa_get_time(); + nEndTime = do_get_time(); double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n", @@ -529,7 +540,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename } } -static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { +static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL; long long nKnownGoodSize = 0LL; @@ -553,7 +564,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c return 100; } - if (lzsa_decode_header(cFrameData, nHeaderSize) < 0) { + if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { fclose(pInFile); pInFile = NULL; fprintf(stderr, "invalid magic number or format version in input file\n"); @@ -659,7 +670,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c } if (nOptions & OPT_VERBOSE) { - nStartTime = lzsa_get_time(); + nStartTime = do_get_time(); } int nDecompressionError = 0; @@ -715,7 +726,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c else { unsigned int nBlockOffs = 0; - nDecompressedSize = lzsa_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); + nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); if (nDecompressedSize < 0) { nDecompressionError = nDecompressedSize; break; @@ -771,7 +782,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c } else { if (nOptions & OPT_VERBOSE) { - nEndTime = lzsa_get_time(); + nEndTime = do_get_time(); double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n", @@ -793,9 +804,11 @@ int main(int argc, char **argv) { bool bCommandDefined = false; bool bVerifyCompression = false; bool bMinMatchDefined = false; + bool bFormatVersionDefined = false; char cCommand = 'z'; - int nMinMatchSize = MIN_MATCH_SIZE; + int nMinMatchSize = 0; unsigned int nOptions = OPT_FAVOR_RATIO; + int nFormatVersion = 1; for (i = 1; i < argc; i++) { if (!strcmp(argv[i], "-d")) { @@ -840,7 +853,7 @@ int main(int argc, char **argv) { if (!bMinMatchDefined && (i + 1) < argc) { char *pEnd = NULL; nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10); - if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) { + if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) { i++; bMinMatchDefined = true; nOptions &= (~OPT_FAVOR_RATIO); @@ -856,7 +869,7 @@ int main(int argc, char **argv) { if (!bMinMatchDefined) { char *pEnd = NULL; nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10); - if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) { + if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) { bMinMatchDefined = true; nOptions &= (~OPT_FAVOR_RATIO); } @@ -869,7 +882,7 @@ int main(int argc, char **argv) { } else if (!strcmp(argv[i], "--prefer-ratio")) { if (!bMinMatchDefined) { - nMinMatchSize = MIN_MATCH_SIZE; + nMinMatchSize = 0; bMinMatchDefined = true; } else @@ -884,6 +897,35 @@ int main(int argc, char **argv) { else bArgsError = true; } + else if (!strcmp(argv[i], "-f")) { + if (!bFormatVersionDefined && (i + 1) < argc) { + char *pEnd = NULL; + nFormatVersion = (int)strtol(argv[i + 1], &pEnd, 10); + if (pEnd && pEnd != argv[i + 1] && (nFormatVersion >= 1 && nFormatVersion <= 2)) { + i++; + bFormatVersionDefined = true; + } + else { + bArgsError = true; + } + } + else + bArgsError = true; + } + else if (!strncmp(argv[i], "-f", 2)) { + if (!bFormatVersionDefined) { + char *pEnd = NULL; + nFormatVersion = (int)strtol(argv[i] + 2, &pEnd, 10); + if (pEnd && pEnd != (argv[i] + 2) && (nFormatVersion >= 1 && nFormatVersion <= 2)) { + bFormatVersionDefined = true; + } + else { + bArgsError = true; + } + } + else + bArgsError = true; + } else if (!strcmp(argv[i], "-v")) { if ((nOptions & OPT_VERBOSE) == 0) { nOptions |= OPT_VERBOSE; @@ -911,26 +953,28 @@ int main(int argc, char **argv) { } if (bArgsError || !pszInFilename || !pszOutFilename) { + fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n"); fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] \n", argv[0]); fprintf(stderr, " -c: check resulting stream after compressing\n"); fprintf(stderr, " -d: decompress (default: compress)\n"); fprintf(stderr, " -v: be verbose\n"); + fprintf(stderr, " -f : LZSA compression format (1-2)\n"); fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); fprintf(stderr, " -D : use dictionary file\n"); - fprintf(stderr, " -m : minimum match size (3-14) (default: 3)\n"); + fprintf(stderr, " -m : minimum match size (3-5) (default: 3)\n"); fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n"); fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n"); return 100; } if (cCommand == 'z') { - int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize); + int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion); if (nResult == 0 && bVerifyCompression) { - nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions); + nResult = do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion); } } else if (cCommand == 'd') { - return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions); + return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); } else { return 100; diff --git a/src/matchfinder.c b/src/matchfinder.c new file mode 100644 index 0000000..c421eb6 --- /dev/null +++ b/src/matchfinder.c @@ -0,0 +1,294 @@ +/* + * matchfinder.c - LZ match finder implementation + * + * The following copying information applies to this specific source code file: + * + * Written in 2019 by Emmanuel Marty + * Portions written in 2014-2015 by Eric Biggers + * + * To the extent possible under law, the author(s) have dedicated all copyright + * and related and neighboring rights to this software to the public domain + * worldwide via the Creative Commons Zero 1.0 Universal Public Domain + * Dedication (the "CC0"). + * + * This software is distributed in the hope that it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the CC0 for more details. + * + * You should have received a copy of the CC0 along with this software; if not + * see . + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "matchfinder.h" +#include "format.h" +#include "lib.h" + +/** + * Parse input data, build suffix array and overlaid data structures to speed up match finding + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress) + * + * @return 0 for success, non-zero for failure + */ +int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) { + unsigned int *intervals = pCompressor->intervals; + + /* Build suffix array from input data */ + if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) { + return 100; + } + + int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */ + int *Phi = PLCP; + int nCurLen = 0; + int i; + + /* Compute the permuted LCP first (Kärkkäinen method) */ + Phi[intervals[0]] = -1; + for (i = 1; i < nInWindowSize; i++) + Phi[intervals[i]] = intervals[i - 1]; + for (i = 0; i < nInWindowSize; i++) { + if (Phi[i] == -1) { + PLCP[i] = 0; + continue; + } + int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]); + while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++; + PLCP[i] = nCurLen; + if (nCurLen > 0) + nCurLen--; + } + + /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also + * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method, + * and the interval builder below doesn't need it either. */ + intervals[0] &= POS_MASK; + int nMinMatchSize = pCompressor->min_match_size; + for (i = 1; i < nInWindowSize - 1; i++) { + int nIndex = (int)(intervals[i] & POS_MASK); + int nLen = PLCP[nIndex]; + if (nLen < nMinMatchSize) + nLen = 0; + if (nLen > LCP_MAX) + nLen = LCP_MAX; + intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT); + } + if (i < nInWindowSize) + intervals[i] &= POS_MASK; + + /** + * Build intervals for finding matches + * + * Methodology and code fragment taken from wimlib (CC0 license): + * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD + */ + unsigned int * const SA_and_LCP = intervals; + unsigned int *pos_data = pCompressor->pos_data; + unsigned int next_interval_idx; + unsigned int *top = pCompressor->open_intervals; + unsigned int prev_pos = SA_and_LCP[0] & POS_MASK; + + *top = 0; + intervals[0] = 0; + next_interval_idx = 1; + + for (int r = 1; r < nInWindowSize; r++) { + const unsigned int next_pos = SA_and_LCP[r] & POS_MASK; + const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK; + const unsigned int top_lcp = *top & LCP_MASK; + + if (next_lcp == top_lcp) { + /* Continuing the deepest open interval */ + pos_data[prev_pos] = *top; + } + else if (next_lcp > top_lcp) { + /* Opening a new interval */ + *++top = next_lcp | next_interval_idx++; + pos_data[prev_pos] = *top; + } + else { + /* Closing the deepest open interval */ + pos_data[prev_pos] = *top; + for (;;) { + const unsigned int closed_interval_idx = *top-- & POS_MASK; + const unsigned int superinterval_lcp = *top & LCP_MASK; + + if (next_lcp == superinterval_lcp) { + /* Continuing the superinterval */ + intervals[closed_interval_idx] = *top; + break; + } + else if (next_lcp > superinterval_lcp) { + /* Creating a new interval that is a + * superinterval of the one being + * closed, but still a subinterval of + * its superinterval */ + *++top = next_lcp | next_interval_idx++; + intervals[closed_interval_idx] = *top; + break; + } + else { + /* Also closing the superinterval */ + intervals[closed_interval_idx] = *top; + } + } + } + prev_pos = next_pos; + } + + /* Close any still-open intervals. */ + pos_data[prev_pos] = *top; + for (; top > pCompressor->open_intervals; top--) + intervals[*top & POS_MASK] = *(top - 1); + + /* Success */ + return 0; +} + +/** + * Find matches at the specified offset in the input window + * + * @param pCompressor compression context + * @param nOffset offset to find matches at, in the input window + * @param pMatches pointer to returned matches + * @param nMaxMatches maximum number of matches to return (0 for none) + * + * @return number of matches + */ +int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) { + unsigned int *intervals = pCompressor->intervals; + unsigned int *pos_data = pCompressor->pos_data; + unsigned int ref; + unsigned int super_ref; + unsigned int match_pos; + lzsa_match *matchptr; + + /** + * Find matches using intervals + * + * Taken from wimlib (CC0 license): + * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD + */ + + /* Get the deepest lcp-interval containing the current suffix. */ + ref = pos_data[nOffset]; + + pos_data[nOffset] = 0; + + /* Ascend until we reach a visited interval, the root, or a child of the + * root. Link unvisited intervals to the current suffix as we go. */ + while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) { + intervals[ref & POS_MASK] = nOffset; + ref = super_ref; + } + + if (super_ref == 0) { + /* In this case, the current interval may be any of: + * (1) the root; + * (2) an unvisited child of the root; + * (3) an interval last visited by suffix 0 + * + * We could avoid the ambiguity with (3) by using an lcp + * placeholder value other than 0 to represent "visited", but + * it's fastest to use 0. So we just don't allow matches with + * position 0. */ + + if (ref != 0) /* Not the root? */ + intervals[ref & POS_MASK] = nOffset; + return 0; + } + + /* Ascend indirectly via pos_data[] links. */ + match_pos = super_ref; + matchptr = pMatches; + for (;;) { + while ((super_ref = pos_data[match_pos]) > ref) + match_pos = intervals[super_ref & POS_MASK]; + intervals[ref & POS_MASK] = nOffset; + pos_data[match_pos] = ref; + + if ((matchptr - pMatches) < nMaxMatches) { + int nMatchOffset = (int)(nOffset - match_pos); + + if (nMatchOffset <= MAX_OFFSET) { + matchptr->length = (unsigned short)(ref >> LCP_SHIFT); + matchptr->offset = (unsigned short)nMatchOffset; + matchptr++; + } + } + + if (super_ref == 0) + break; + ref = super_ref; + match_pos = intervals[ref & POS_MASK]; + } + + return (int)(matchptr - pMatches); +} + +/** + * Skip previously compressed bytes + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically 0) + * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) + */ +void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + lzsa_match match; + int i; + + /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However, + * we don't store the matches. */ + for (i = nStartOffset; i < nEndOffset; i++) { + lzsa_find_matches_at(pCompressor, i, &match, 0); + } +} + +/** + * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for + * the optimizer to look at. + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT); + int i; + + for (i = nStartOffset; i < nEndOffset; i++) { + int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET); + int m; + + for (m = 0; m < NMATCHES_PER_OFFSET; m++) { + if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) { + pMatch->length = 0; + pMatch->offset = 0; + } + else { + int nMaxLen = (nEndOffset - LAST_LITERALS) - i; + if (nMaxLen < 0) + nMaxLen = 0; + if (pMatch->length > nMaxLen) + pMatch->length = (unsigned short)nMaxLen; + } + + pMatch++; + } + } +} diff --git a/src/matchfinder.h b/src/matchfinder.h new file mode 100644 index 0000000..27bcc34 --- /dev/null +++ b/src/matchfinder.h @@ -0,0 +1,82 @@ +/* + * matchfinder.h - LZ match finder definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _MATCHFINDER_H +#define _MATCHFINDER_H + +/* Forward declarations */ +typedef struct _lzsa_match lzsa_match; +typedef struct _lsza_compressor lsza_compressor; + +/** + * Parse input data, build suffix array and overlaid data structures to speed up match finding + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress) + * + * @return 0 for success, non-zero for failure + */ +int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize); + +/** + * Find matches at the specified offset in the input window + * + * @param pCompressor compression context + * @param nOffset offset to find matches at, in the input window + * @param pMatches pointer to returned matches + * @param nMaxMatches maximum number of matches to return (0 for none) + * + * @return number of matches + */ +int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches); + +/** + * Skip previously compressed bytes + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically 0) + * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) + */ +void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset); + +/** + * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for + * the optimizer to look at. + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset); + +#endif /* _MATCHFINDER_H */ diff --git a/src/shrink.c b/src/shrink.c deleted file mode 100755 index cd6fa9c..0000000 --- a/src/shrink.c +++ /dev/null @@ -1,830 +0,0 @@ -/* - * shrink.c - block compressor implementation - * - * The following copying information applies to this specific source code file: - * - * Written in 2019 by Emmanuel Marty - * With help, ideas, optimizations and speed measurements by spke - * Portions written in 2014-2015 by Eric Biggers - * - * To the extent possible under law, the author(s) have dedicated all copyright - * and related and neighboring rights to this software to the public domain - * worldwide via the Creative Commons Zero 1.0 Universal Public Domain - * Dedication (the "CC0"). - * - * This software is distributed in the hope that it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - * FOR A PARTICULAR PURPOSE. See the CC0 for more details. - * - * You should have received a copy of the CC0 along with this software; if not - * see . - */ - -/* - * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori - * - * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 - * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard - * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ - * - */ - -#include -#include -#include -#include "shrink.h" -#include "format.h" - -#define LCP_BITS 15 -#define LCP_MAX (1<<(LCP_BITS - 1)) -#define LCP_SHIFT (32-LCP_BITS) -#define LCP_MASK (((1<divsufsort_context); - pCompressor->intervals = NULL; - pCompressor->pos_data = NULL; - pCompressor->open_intervals = NULL; - pCompressor->match = NULL; - pCompressor->min_match_size = nMinMatchSize; - if (pCompressor->min_match_size < MIN_MATCH_SIZE) - pCompressor->min_match_size = MIN_MATCH_SIZE; - else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1)) - pCompressor->min_match_size = MATCH_RUN_LEN - 1; - pCompressor->flags = nFlags; - pCompressor->num_commands = 0; - - if (!nResult) { - pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); - - if (pCompressor->intervals) { - pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); - - if (pCompressor->pos_data) { - pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int)); - - if (pCompressor->open_intervals) { - pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match)); - - if (pCompressor->match) - return 0; - } - } - } - } - - lzsa_compressor_destroy(pCompressor); - return 100; -} - -/** - * Clean up compression context and free up any associated resources - * - * @param pCompressor compression context to clean up - */ -void lzsa_compressor_destroy(lsza_compressor *pCompressor) { - divsufsort_destroy(&pCompressor->divsufsort_context); - - if (pCompressor->match) { - free(pCompressor->match); - pCompressor->match = NULL; - } - - if (pCompressor->open_intervals) { - free(pCompressor->open_intervals); - pCompressor->open_intervals = NULL; - } - - if (pCompressor->pos_data) { - free(pCompressor->pos_data); - pCompressor->pos_data = NULL; - } - - if (pCompressor->intervals) { - free(pCompressor->intervals); - pCompressor->intervals = NULL; - } -} - -/** - * Parse input data, build suffix array and overlaid data structures to speed up match finding - * - * @param pCompressor compression context - * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) - * @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress) - * - * @return 0 for success, non-zero for failure - */ -static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) { - unsigned int *intervals = pCompressor->intervals; - - /* Build suffix array from input data */ - if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) { - return 100; - } - - int *PLCP = (int*)pCompressor->pos_data; /* Use temporarily */ - int *Phi = PLCP; - int nCurLen = 0; - int i; - - /* Compute the permuted LCP first (Kärkkäinen method) */ - Phi[intervals[0]] = -1; - for (i = 1; i < nInWindowSize; i++) - Phi[intervals[i]] = intervals[i - 1]; - for (i = 0; i < nInWindowSize; i++) { - if (Phi[i] == -1) { - PLCP[i] = 0; - continue; - } - int nMaxLen = (i > Phi[i]) ? (nInWindowSize - i) : (nInWindowSize - Phi[i]); - while (nCurLen < nMaxLen && pInWindow[i + nCurLen] == pInWindow[Phi[i] + nCurLen]) nCurLen++; - PLCP[i] = nCurLen; - if (nCurLen > 0) - nCurLen--; - } - - /* Rotate permuted LCP into the LCP. This has better cache locality than the direct Kasai LCP method. This also - * saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method, - * and the interval builder below doesn't need it either. */ - intervals[0] &= POS_MASK; - int nMinMatchSize = pCompressor->min_match_size; - for (i = 1; i < nInWindowSize - 1; i++) { - int nIndex = (int)(intervals[i] & POS_MASK); - int nLen = PLCP[nIndex]; - if (nLen < nMinMatchSize) - nLen = 0; - if (nLen > LCP_MAX) - nLen = LCP_MAX; - intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nLen) << LCP_SHIFT); - } - if (i < nInWindowSize) - intervals[i] &= POS_MASK; - - /** - * Build intervals for finding matches - * - * Methodology and code fragment taken from wimlib (CC0 license): - * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD - */ - unsigned int * const SA_and_LCP = intervals; - unsigned int *pos_data = pCompressor->pos_data; - unsigned int next_interval_idx; - unsigned int *top = pCompressor->open_intervals; - unsigned int prev_pos = SA_and_LCP[0] & POS_MASK; - - *top = 0; - intervals[0] = 0; - next_interval_idx = 1; - - for (int r = 1; r < nInWindowSize; r++) { - const unsigned int next_pos = SA_and_LCP[r] & POS_MASK; - const unsigned int next_lcp = SA_and_LCP[r] & LCP_MASK; - const unsigned int top_lcp = *top & LCP_MASK; - - if (next_lcp == top_lcp) { - /* Continuing the deepest open interval */ - pos_data[prev_pos] = *top; - } - else if (next_lcp > top_lcp) { - /* Opening a new interval */ - *++top = next_lcp | next_interval_idx++; - pos_data[prev_pos] = *top; - } - else { - /* Closing the deepest open interval */ - pos_data[prev_pos] = *top; - for (;;) { - const unsigned int closed_interval_idx = *top-- & POS_MASK; - const unsigned int superinterval_lcp = *top & LCP_MASK; - - if (next_lcp == superinterval_lcp) { - /* Continuing the superinterval */ - intervals[closed_interval_idx] = *top; - break; - } - else if (next_lcp > superinterval_lcp) { - /* Creating a new interval that is a - * superinterval of the one being - * closed, but still a subinterval of - * its superinterval */ - *++top = next_lcp | next_interval_idx++; - intervals[closed_interval_idx] = *top; - break; - } - else { - /* Also closing the superinterval */ - intervals[closed_interval_idx] = *top; - } - } - } - prev_pos = next_pos; - } - - /* Close any still-open intervals. */ - pos_data[prev_pos] = *top; - for (; top > pCompressor->open_intervals; top--) - intervals[*top & POS_MASK] = *(top - 1); - - /* Success */ - return 0; -} - -/** - * Find matches at the specified offset in the input window - * - * @param pCompressor compression context - * @param nOffset offset to find matches at, in the input window - * @param pMatches pointer to returned matches - * @param nMaxMatches maximum number of matches to return (0 for none) - * - * @return number of matches - */ -static int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) { - unsigned int *intervals = pCompressor->intervals; - unsigned int *pos_data = pCompressor->pos_data; - unsigned int ref; - unsigned int super_ref; - unsigned int match_pos; - lzsa_match *matchptr; - - /** - * Find matches using intervals - * - * Taken from wimlib (CC0 license): - * https://wimlib.net/git/?p=wimlib;a=blob_plain;f=src/lcpit_matchfinder.c;h=a2d6a1e0cd95200d1f3a5464d8359d5736b14cbe;hb=HEAD - */ - - /* Get the deepest lcp-interval containing the current suffix. */ - ref = pos_data[nOffset]; - - pos_data[nOffset] = 0; - - /* Ascend until we reach a visited interval, the root, or a child of the - * root. Link unvisited intervals to the current suffix as we go. */ - while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) { - intervals[ref & POS_MASK] = nOffset; - ref = super_ref; - } - - if (super_ref == 0) { - /* In this case, the current interval may be any of: - * (1) the root; - * (2) an unvisited child of the root; - * (3) an interval last visited by suffix 0 - * - * We could avoid the ambiguity with (3) by using an lcp - * placeholder value other than 0 to represent "visited", but - * it's fastest to use 0. So we just don't allow matches with - * position 0. */ - - if (ref != 0) /* Not the root? */ - intervals[ref & POS_MASK] = nOffset; - return 0; - } - - /* Ascend indirectly via pos_data[] links. */ - match_pos = super_ref; - matchptr = pMatches; - for (;;) { - while ((super_ref = pos_data[match_pos]) > ref) - match_pos = intervals[super_ref & POS_MASK]; - intervals[ref & POS_MASK] = nOffset; - pos_data[match_pos] = ref; - - if ((matchptr - pMatches) < nMaxMatches) { - int nMatchOffset = (int)(nOffset - match_pos); - - if (nMatchOffset <= MAX_OFFSET) { - matchptr->length = (unsigned short)(ref >> LCP_SHIFT); - matchptr->offset = (unsigned short)nMatchOffset; - matchptr++; - } - } - - if (super_ref == 0) - break; - ref = super_ref; - match_pos = intervals[ref & POS_MASK]; - } - - return (int)(matchptr - pMatches); -} - -/** - * Skip previously compressed bytes - * - * @param pCompressor compression context - * @param nStartOffset current offset in input window (typically 0) - * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) - */ -static void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { - lzsa_match match; - int i; - - /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However, - * we don't store the matches. */ - for (i = nStartOffset; i < nEndOffset; i++) { - lzsa_find_matches_at(pCompressor, i, &match, 0); - } -} - -/** - * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for - * the optimizer to look at. - * - * @param pCompressor compression context - * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) - * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes - */ -static void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { - lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT); - int i; - - for (i = nStartOffset; i < nEndOffset; i++) { - int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, NMATCHES_PER_OFFSET); - int m; - - for (m = 0; m < NMATCHES_PER_OFFSET; m++) { - if (nMatches <= m || i > (nEndOffset - LAST_MATCH_OFFSET)) { - pMatch->length = 0; - pMatch->offset = 0; - } - else { - int nMaxLen = (nEndOffset - LAST_LITERALS) - i; - if (nMaxLen < 0) - nMaxLen = 0; - if (pMatch->length > nMaxLen) - pMatch->length = (unsigned short)nMaxLen; - } - - pMatch++; - } - } -} - -/** - * Get the number of extra bits required to represent a literals length - * - * @param nLength literals length - * - * @return number of extra bits required - */ -static inline int lzsa_get_literals_varlen_size(const int nLength) { - if (nLength < LITERALS_RUN_LEN) { - return 0; - } - else { - if (nLength < 256) - return 8; - else { - if (nLength < 512) - return 16; - else - return 24; - } - } -} - -/** - * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough - * room to write the bytes. - * - * @param pOutData pointer to output buffer - * @param nOutOffset current write index into output buffer - * @param nLength literals length - */ -static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) { - if (nLength >= LITERALS_RUN_LEN) { - if (nLength < 256) - pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN; - else { - if (nLength < 512) { - pOutData[nOutOffset++] = 250; - pOutData[nOutOffset++] = nLength - 256; - } - else { - pOutData[nOutOffset++] = 249; - pOutData[nOutOffset++] = nLength & 0xff; - pOutData[nOutOffset++] = (nLength >> 8) & 0xff; - } - } - } - - return nOutOffset; -} - -/** - * Get the number of extra bits required to represent an encoded match length - * - * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE) - * - * @return number of extra bits required - */ -static inline int lzsa_get_match_varlen_size(const int nLength) { - if (nLength < MATCH_RUN_LEN) { - return 0; - } - else { - if ((nLength + MIN_MATCH_SIZE) < 256) - return 8; - else { - if ((nLength + MIN_MATCH_SIZE) < 512) - return 16; - else - return 24; - } - } -} - -/** - * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough - * room to write the bytes. - * - * @param pOutData pointer to output buffer - * @param nOutOffset current write index into output buffer - * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE) - */ -static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) { - if (nLength >= MATCH_RUN_LEN) { - if ((nLength + MIN_MATCH_SIZE) < 256) - pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN; - else { - if ((nLength + MIN_MATCH_SIZE) < 512) { - pOutData[nOutOffset++] = 239; - pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE - 256; - } - else { - pOutData[nOutOffset++] = 238; - pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE) & 0xff; - pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE) >> 8) & 0xff; - } - } - } - - return nOutOffset; -} - -/** - * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input - * - * @param pCompressor compression context - * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) - * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes - */ -static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { - int *cost = (int*)pCompressor->pos_data; /* Reuse */ - int nLastLiteralsOffset; - int nMinMatchSize = pCompressor->min_match_size; - const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; - int i; - - cost[nEndOffset - 1] = 8; - nLastLiteralsOffset = nEndOffset; - - for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) { - int nBestCost, nBestMatchLen, nBestMatchOffset; - - int nLiteralsLen = nLastLiteralsOffset - i; - nBestCost = 8 + cost[i + 1]; - if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == 256 || nLiteralsLen == 512) { - /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. - * The cost automatically accumulates down the chain. */ - nBestCost += 8; - } - if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) - nBestCost += MODESWITCH_PENALTY; - nBestMatchLen = 0; - nBestMatchOffset = 0; - - lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); - int m; - - for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) { - int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16; - - if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) { - int nCurCost; - int nMatchLen = pMatch[m].length; - - if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) - nMatchLen = nEndOffset - LAST_LITERALS - i; - - nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE); - nCurCost += cost[i + nMatchLen]; - if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) - nCurCost += MODESWITCH_PENALTY; - - if (nBestCost > (nCurCost - nFavorRatio)) { - nBestCost = nCurCost; - nBestMatchLen = nMatchLen; - nBestMatchOffset = pMatch[m].offset; - } - } - else { - int nMatchLen = pMatch[m].length; - int k, nMatchRunLen; - - if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) - nMatchLen = nEndOffset - LAST_LITERALS - i; - - nMatchRunLen = nMatchLen; - if (nMatchRunLen > MATCH_RUN_LEN) - nMatchRunLen = MATCH_RUN_LEN; - - for (k = nMinMatchSize; k < nMatchRunLen; k++) { - int nCurCost; - - nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */; - nCurCost += cost[i + k]; - if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) - nCurCost += MODESWITCH_PENALTY; - - if (nBestCost > (nCurCost - nFavorRatio)) { - nBestCost = nCurCost; - nBestMatchLen = k; - nBestMatchOffset = pMatch[m].offset; - } - } - - for (; k <= nMatchLen; k++) { - int nCurCost; - - nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE); - nCurCost += cost[i + k]; - if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) - nCurCost += MODESWITCH_PENALTY; - - if (nBestCost > (nCurCost - nFavorRatio)) { - nBestCost = nCurCost; - nBestMatchLen = k; - nBestMatchOffset = pMatch[m].offset; - } - } - } - } - - if (nBestMatchLen >= MIN_MATCH_SIZE) - nLastLiteralsOffset = i; - - cost[i] = nBestCost; - pMatch->length = nBestMatchLen; - pMatch->offset = nBestMatchOffset; - } -} - -/** - * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without - * impacting the compression ratio - * - * @param pCompressor compression context - * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) - * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes - * - * @return non-zero if the number of tokens was reduced, 0 if it wasn't - */ -static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { - int i; - int nNumLiterals = 0; - int nDidReduce = 0; - - for (i = nStartOffset; i < nEndOffset; ) { - lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); - - if (pMatch->length >= MIN_MATCH_SIZE) { - int nMatchLen = pMatch->length; - int nReduce = 0; - - if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ { - int nMatchOffset = pMatch->offset; - int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen); - - if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) { - if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nMatchLen))) { - /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will - * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is - * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current - * match command by literals, the output size will not increase and it will remove one command. */ - nReduce = 1; - } - } - else { - int nCurIndex = i + nMatchLen; - int nNextNumLiterals = 0; - - do { - nCurIndex++; - nNextNumLiterals++; - } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE); - - if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size(nNextNumLiterals))) { - /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take - * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */ - nReduce = 1; - } - } - } - - if (nReduce) { - int j; - - for (j = 0; j < nMatchLen; j++) { - pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0; - } - nNumLiterals += nMatchLen; - i += nMatchLen; - - nDidReduce = 1; - } - else { - if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX && - pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 && - (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) { - /* Join */ - - pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length; - pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0; - pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1; - continue; - } - - nNumLiterals = 0; - i += nMatchLen; - } - } - else { - nNumLiterals++; - i++; - } - } - - return nDidReduce; -} - -/** - * Emit block of compressed data - * - * @param pCompressor compression context - * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) - * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) - * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes - * @param pOutData pointer to output buffer - * @param nMaxOutDataSize maximum size of output buffer, in bytes - * - * @return size of compressed data in output buffer, or -1 if the data is uncompressible - */ -static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { - int i; - int nNumLiterals = 0; - int nInFirstLiteralOffset = 0; - int nOutOffset = 0; - - for (i = nStartOffset; i < nEndOffset; ) { - lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); - - if (pMatch->length >= MIN_MATCH_SIZE) { - int nMatchOffset = pMatch->offset; - int nMatchLen = pMatch->length; - int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE; - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals; - int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen; - int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen); - - if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) - return -1; - if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET) - return -1; - - pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen; - nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals); - - if (nNumLiterals != 0) { - memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); - nOutOffset += nNumLiterals; - nNumLiterals = 0; - } - - pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; - if (nTokenLongOffset) { - pOutData[nOutOffset++] = (-nMatchOffset) >> 8; - } - nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen); - i += nMatchLen; - - pCompressor->num_commands++; - } - else { - if (nNumLiterals == 0) - nInFirstLiteralOffset = i; - nNumLiterals++; - i++; - } - } - - { - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3); - - if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) - return -1; - - if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) - pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f; - else - pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00; - nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals); - - if (nNumLiterals != 0) { - memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); - nOutOffset += nNumLiterals; - nNumLiterals = 0; - } - - pCompressor->num_commands++; - } - - if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { - /* Emit EOD marker for raw block */ - - if ((nOutOffset + 4) > nMaxOutDataSize) - return -1; - - pOutData[nOutOffset++] = 0; - pOutData[nOutOffset++] = 238; - pOutData[nOutOffset++] = 0; - pOutData[nOutOffset++] = 0; - } - - return nOutOffset; -} - -/** - * Compress one block of data - * - * @param pCompressor compression context - * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) - * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none) - * @param nInDataSize number of input bytes to compress - * @param pOutData pointer to output buffer - * @param nMaxOutDataSize maximum size of output buffer, in bytes - * - * @return size of compressed data in output buffer, or -1 if the data is uncompressible - */ -int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { - if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) - return -1; - if (nPreviousBlockSize) { - lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize); - } - lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); - lzsa_optimize_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); - - int nDidReduce; - int nPasses = 0; - do { - nDidReduce = lzsa_optimize_command_count(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); - nPasses++; - } while (nDidReduce && nPasses < 20); - - return lzsa_write_block(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); -} - -/** - * Get the number of compression commands issued in compressed data blocks - * - * @return number of commands - */ -int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) { - return pCompressor->num_commands; -} diff --git a/src/shrink_v1.c b/src/shrink_v1.c new file mode 100644 index 0000000..1ca3918 --- /dev/null +++ b/src/shrink_v1.c @@ -0,0 +1,460 @@ +/* + * shrink_v1.c - LZSA1 block compressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "lib.h" +#include "shrink_v1.h" +#include "format.h" + +/** + * Get the number of extra bits required to represent a literals length + * + * @param nLength literals length + * + * @return number of extra bits required + */ +static inline int lzsa_get_literals_varlen_size_v1(const int nLength) { + if (nLength < LITERALS_RUN_LEN_V1) { + return 0; + } + else { + if (nLength < 256) + return 8; + else { + if (nLength < 512) + return 16; + else + return 24; + } + } +} + +/** + * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength literals length + */ +static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) { + if (nLength >= LITERALS_RUN_LEN_V1) { + if (nLength < 256) + pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1; + else { + if (nLength < 512) { + pOutData[nOutOffset++] = 250; + pOutData[nOutOffset++] = nLength - 256; + } + else { + pOutData[nOutOffset++] = 249; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent an encoded match length + * + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1) + * + * @return number of extra bits required + */ +static inline int lzsa_get_match_varlen_size_v1(const int nLength) { + if (nLength < MATCH_RUN_LEN_V1) { + return 0; + } + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 256) + return 8; + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 512) + return 16; + else + return 24; + } + } +} + +/** + * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1) + */ +static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) { + if (nLength >= MATCH_RUN_LEN_V1) { + if ((nLength + MIN_MATCH_SIZE_V1) < 256) + pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1; + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 512) { + pOutData[nOutOffset++] = 239; + pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256; + } + else { + pOutData[nOutOffset++] = 238; + pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff; + pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_optimize_matches_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int *cost = (int*)pCompressor->pos_data; /* Reuse */ + int nLastLiteralsOffset; + int nMinMatchSize = pCompressor->min_match_size; + const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; + int i; + + cost[nEndOffset - 1] = 8; + nLastLiteralsOffset = nEndOffset; + + for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) { + int nBestCost, nBestMatchLen, nBestMatchOffset; + + int nLiteralsLen = nLastLiteralsOffset - i; + nBestCost = 8 + cost[i + 1]; + if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) { + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + nBestCost += 8; + } + if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nBestCost += MODESWITCH_PENALTY; + nBestMatchLen = 0; + nBestMatchOffset = 0; + + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + int m; + + for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) { + int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16; + + if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) { + int nCurCost; + int nMatchLen = pMatch[m].length; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1); + nCurCost += cost[i + nMatchLen]; + if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = nMatchLen; + nBestMatchOffset = pMatch[m].offset; + } + } + else { + int nMatchLen = pMatch[m].length; + int k, nMatchRunLen; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nMatchRunLen = nMatchLen; + if (nMatchRunLen > MATCH_RUN_LEN_V1) + nMatchRunLen = MATCH_RUN_LEN_V1; + + for (k = nMinMatchSize; k < nMatchRunLen; k++) { + int nCurCost; + + nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */; + nCurCost += cost[i + k]; + if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + } + } + + for (; k <= nMatchLen; k++) { + int nCurCost; + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1); + nCurCost += cost[i + k]; + if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + } + } + } + } + + if (nBestMatchLen >= MIN_MATCH_SIZE_V1) + nLastLiteralsOffset = i; + + cost[i] = nBestCost; + pMatch->length = nBestMatchLen; + pMatch->offset = nBestMatchOffset; + } +} + +/** + * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without + * impacting the compression ratio + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * + * @return non-zero if the number of tokens was reduced, 0 if it wasn't + */ +static int lzsa_optimize_command_count_v1(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int i; + int nNumLiterals = 0; + int nDidReduce = 0; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + + if (pMatch->length >= MIN_MATCH_SIZE_V1) { + int nMatchLen = pMatch->length; + int nReduce = 0; + + if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ { + int nMatchOffset = pMatch->offset; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + + if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) { + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) { + /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will + * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is + * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current + * match command by literals, the output size will not increase and it will remove one command. */ + nReduce = 1; + } + } + else { + int nCurIndex = i + nMatchLen; + int nNextNumLiterals = 0; + + do { + nCurIndex++; + nNextNumLiterals++; + } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1); + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) { + /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take + * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */ + nReduce = 1; + } + } + } + + if (nReduce) { + int j; + + for (j = 0; j < nMatchLen; j++) { + pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0; + } + nNumLiterals += nMatchLen; + i += nMatchLen; + + nDidReduce = 1; + } + else { + if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX && + pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 && + (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_OFFSET) { + /* Join */ + + pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length; + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0; + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1; + continue; + } + + nNumLiterals = 0; + i += nMatchLen; + } + } + else { + nNumLiterals++; + i++; + } + } + + return nDidReduce; +} + +/** + * Emit block of compressed data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int lzsa_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { + int i; + int nNumLiterals = 0; + int nInFirstLiteralOffset = 0; + int nOutOffset = 0; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + + if (pMatch->length >= MIN_MATCH_SIZE_V1) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen; + int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + + if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) + return -1; + if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET) + return -1; + + pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen; + nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + if (nTokenLongOffset) { + pOutData[nOutOffset++] = (-nMatchOffset) >> 8; + } + nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen); + i += nMatchLen; + + pCompressor->num_commands++; + } + else { + if (nNumLiterals == 0) + nInFirstLiteralOffset = i; + nNumLiterals++; + i++; + } + } + + { + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); + + if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) + return -1; + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) + pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f; + else + pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00; + nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pCompressor->num_commands++; + } + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { + /* Emit EOD marker for raw block */ + + if ((nOutOffset + 4) > nMaxOutDataSize) + return -1; + + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = 238; + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = 0; + } + + return nOutOffset; +} + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + int nDidReduce; + int nPasses = 0; + do { + nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + nPasses++; + } while (nDidReduce && nPasses < 20); + + return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); +} diff --git a/src/shrink_v1.h b/src/shrink_v1.h new file mode 100644 index 0000000..aa1a7d5 --- /dev/null +++ b/src/shrink_v1.h @@ -0,0 +1,53 @@ +/* + * shrink_v1.h - LZSA1 block compressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_V1_H +#define _SHRINK_V1_H + +/* Forward declarations */ +typedef struct _lsza_compressor lsza_compressor; + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v1(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); + +#endif /* _SHRINK_V1_H */ diff --git a/src/shrink_v2.c b/src/shrink_v2.c new file mode 100644 index 0000000..27b8d76 --- /dev/null +++ b/src/shrink_v2.c @@ -0,0 +1,733 @@ +/* + * shrink_v2.c - LZSA2 block compressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "lib.h" +#include "shrink_v2.h" +#include "format.h" + +/** + * Write 4-bit nibble to output (compressed) buffer + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles + * @param nCurFreeNibbles current number of free nibbles in byte + * @param nNibbleValue value to write (0..15) + */ +static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) { + if (nOutOffset < 0) return -1; + + if ((*nCurNibbleOffset) == -1) { + if (nOutOffset >= nMaxOutDataSize) return -1; + (*nCurNibbleOffset) = nOutOffset; + (*nCurFreeNibbles) = 2; + pOutData[nOutOffset++] = 0; + } + + pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f); + (*nCurFreeNibbles)--; + if ((*nCurFreeNibbles) == 0) { + (*nCurNibbleOffset) = -1; + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent a literals length + * + * @param nLength literals length + * + * @return number of extra bits required + */ +static inline int lzsa_get_literals_varlen_size_v2(const int nLength) { + if (nLength < LITERALS_RUN_LEN_V2) { + return 0; + } + else { + if (nLength < (LITERALS_RUN_LEN_V2 + 15)) { + return 4; + } + else { + if (nLength < 256) + return 4+8; + else { + return 4+24; + } + } + } +} + +/** + * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength literals length + */ +static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) { + if (nLength >= LITERALS_RUN_LEN_V2) { + if (nLength < (LITERALS_RUN_LEN_V2 + 15)) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2); + } + else { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15); + if (nOutOffset < 0) return -1; + + if (nLength < 256) + pOutData[nOutOffset++] = nLength; + else { + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent an encoded match length + * + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2) + * + * @return number of extra bits required + */ +static inline int lzsa_get_match_varlen_size_v2(const int nLength) { + if (nLength < MATCH_RUN_LEN_V2) { + return 0; + } + else { + if (nLength < (MATCH_RUN_LEN_V2 + 15)) + return 4; + else { + if ((nLength + MIN_MATCH_SIZE_V2) < 256) + return 4+8; + else { + return 4 + 24; + } + } + } +} + +/** + * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2) + */ +static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) { + if (nLength >= MATCH_RUN_LEN_V2) { + if (nLength < (MATCH_RUN_LEN_V2 + 15)) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2); + } + else { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15); + if (nOutOffset < 0) return -1; + + if ((nLength + MIN_MATCH_SIZE_V2) < 256) + pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2; + else { + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff; + pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_optimize_matches_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int *cost = (int*)pCompressor->pos_data; /* Reuse */ + int *prev_match = (int*)pCompressor->intervals; /* Reuse */ + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + lzsa_match *pBestMatch = pCompressor->best_match; + int nLastLiteralsOffset; + int nMinMatchSize = pCompressor->min_match_size; + const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; + int i; + + cost[nEndOffset - 1] = 8; + prev_match[nEndOffset - 1] = nEndOffset; + nLastLiteralsOffset = nEndOffset; + + pCompressor->best_match[nEndOffset - 1].length = 0; + pCompressor->best_match[nEndOffset - 1].offset = 0; + + repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1; + repmatch_opt[nEndOffset - 1].incoming_offset = -1; + repmatch_opt[nEndOffset - 1].expected_repmatch = 0; + + for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) { + int nLiteralsCost; + + int nLiteralsLen = nLastLiteralsOffset - i; + nLiteralsCost = 8 + cost[i + 1]; + if (nLiteralsLen == LITERALS_RUN_LEN_V2) { + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + nLiteralsCost += 4; + } + else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) { + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + nLiteralsCost += 8; + } + else if (nLiteralsLen == 256) { + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + nLiteralsCost += 16; + } + if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2) + nLiteralsCost += MODESWITCH_PENALTY; + + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT); + int m; + + cost[i] = nLiteralsCost; + pCompressor->best_match[i].length = 0; + pCompressor->best_match[i].offset = 0; + + repmatch_opt[i].best_slot_for_incoming = -1; + repmatch_opt[i].incoming_offset = -1; + repmatch_opt[i].expected_repmatch = 0; + + for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) { + int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch; + + nBestCost = nLiteralsCost; + nBestMatchLen = 0; + nBestMatchOffset = 0; + nBestUpdatedSlot = -1; + nBestUpdatedIndex = -1; + nBestExpectedRepMatch = 0; + + if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) { + int nCurCost; + int nMatchLen = pMatch[m].length; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + int nCurIndex = prev_match[i + nMatchLen]; + + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2); + nCurCost += cost[i + nMatchLen]; + if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = nMatchLen; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = -1; + nBestUpdatedIndex = -1; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + else { + int nMatchLen = pMatch[m].length; + int k, nMatchRunLen; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nMatchRunLen = nMatchLen; + if (nMatchRunLen > MATCH_RUN_LEN_V2) + nMatchRunLen = MATCH_RUN_LEN_V2; + + for (k = nMinMatchSize; k < nMatchRunLen; k++) { + int nCurCost; + + int nCurIndex = prev_match[i + k]; + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */; + nCurCost += cost[i + k]; + if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + int nCurUpdatedSlot = -1; + int nCurUpdatedIndex = -1; + + if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) { + int r; + + for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) { + if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) { + int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex]; + + if (nAltCost <= nCurCost) { + nCurUpdatedSlot = r; + nCurUpdatedIndex = nCurIndex; + nCurCost = nAltCost; + nCurExpectedRepMatch = 2; + } + } + } + } + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = nCurUpdatedSlot; + nBestUpdatedIndex = nCurUpdatedIndex; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + + for (; k <= nMatchLen; k++) { + int nCurCost; + + int nCurIndex = prev_match[i + k]; + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2); + nCurCost += cost[i + k]; + if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + int nCurUpdatedSlot = -1; + int nCurUpdatedIndex = -1; + + if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) { + int r; + + for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) { + if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) { + int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex]; + + if (nAltCost <= nCurCost) { + nCurUpdatedSlot = r; + nCurUpdatedIndex = nCurIndex; + nCurCost = nAltCost; + nCurExpectedRepMatch = 2; + } + } + } + } + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = nCurUpdatedSlot; + nBestUpdatedIndex = nCurUpdatedIndex; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + } + + pSlotCost[m] = nBestCost; + pMatch[m].length = nBestMatchLen; + pMatch[m].offset = nBestMatchOffset; /* not necessary */ + + if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) { + cost[i] = nBestCost; + pCompressor->best_match[i].length = nBestMatchLen; + pCompressor->best_match[i].offset = nBestMatchOffset; + + repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch; + + if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) { + repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot; + repmatch_opt[nBestUpdatedIndex].incoming_offset = i; + } + } + } + for (; m < NMATCHES_PER_OFFSET; m++) { + pSlotCost[m] = 0; + } + + if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) + nLastLiteralsOffset = i; + + prev_match[i] = nLastLiteralsOffset; + } + + int nIncomingOffset = -1; + for (i = nStartOffset; i < nEndOffset; ) { + if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) { + if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming; + int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming; + + pCompressor->best_match[i].length = pMatch->length; + pCompressor->best_match[i].offset = pMatch->offset; + cost[i] = *pSlotCost; + + if (repmatch_opt[i].expected_repmatch == 2) + repmatch_opt[i].expected_repmatch = 1; + } + else { + if (repmatch_opt[i].expected_repmatch == 2) + repmatch_opt[i].expected_repmatch = 0; + } + + nIncomingOffset = i; + i += pCompressor->best_match[i].length; + } + else { + i++; + } + } +} + +/** + * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without + * impacting the compression ratio + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * + * @return non-zero if the number of tokens was reduced, 0 if it wasn't + */ +static int lzsa_optimize_command_count_v2(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int i; + int nNumLiterals = 0; + int nDidReduce = 0; + int nPreviousMatchOffset = -1; + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->best_match + i; + + if (pMatch->length >= MIN_MATCH_SIZE_V2) { + int nMatchLen = pMatch->length; + int nReduce = 0; + int nCurrentMatchOffset = i; + + if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ { + int nMatchOffset = pMatch->offset; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2; + int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16; + + if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) { + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost; + + if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) { + nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */; + } + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) { + /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will + * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is + * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current + * match command by literals, the output size will not increase and it will remove one command. */ + nReduce = 1; + } + } + else { + int nCurIndex = i + nMatchLen; + int nNextNumLiterals = 0; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;; + + do { + nCurIndex++; + nNextNumLiterals++; + } while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2); + + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != nMatchOffset) { + nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */; + } + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) { + /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take + * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */ + nReduce = 1; + } + } + } + + if (nReduce) { + int j; + + for (j = 0; j < nMatchLen; j++) { + pCompressor->best_match[i + j].length = 0; + } + nNumLiterals += nMatchLen; + i += nMatchLen; + + nDidReduce = 1; + + if (nPreviousMatchOffset >= 0) { + repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0; + nPreviousMatchOffset = -1; + } + } + else { + if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX && + pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 && + (nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_OFFSET) { + /* Join */ + + pMatch->length += pCompressor->best_match[i + nMatchLen].length; + pCompressor->best_match[i + nMatchLen].offset = 0; + pCompressor->best_match[i + nMatchLen].length = -1; + continue; + } + + nNumLiterals = 0; + i += nMatchLen; + } + + nPreviousMatchOffset = nCurrentMatchOffset; + } + else { + nNumLiterals++; + i++; + } + } + + return nDidReduce; +} + +/** + * Emit block of compressed data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int lzsa_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { + int i; + int nNumLiterals = 0; + int nInFirstLiteralOffset = 0; + int nOutOffset = 0; + int nCurNibbleOffset = -1, nCurFreeNibbles = 0; + int nRepMatchOffset = 0; + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->best_match + i; + + if (pMatch->length >= MIN_MATCH_SIZE_V2) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2; + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; + int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen; + int nTokenOffsetMode; + int nOffsetSize; + + if (nMatchOffset == nRepMatchOffset) { + nTokenOffsetMode = 0xe0; + nOffsetSize = 0; + } + else { + if (nMatchOffset <= 32) { + nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1); + nOffsetSize = 4; + } + else if (nMatchOffset <= 512) { + nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3); + nOffsetSize = 8; + } + else if (nMatchOffset <= (8192 + 512)) { + nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7); + nOffsetSize = 12; + } + else { + nTokenOffsetMode = 0xc0; + nOffsetSize = 16; + } + } + + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen); + + if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize) + return -1; + if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET) + return -1; + + pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen; + nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); + if (nOutOffset < 0) return -1; + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f); + if (nOutOffset < 0) return -1; + } + else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) { + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + } + else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) { + pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff; + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f); + if (nOutOffset < 0) return -1; + } + else if (nTokenOffsetMode == 0xc0) { + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + pOutData[nOutOffset++] = (-nMatchOffset) >> 8; + } + nRepMatchOffset = nMatchOffset; + + nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen); + if (nOutOffset < 0) return -1; + + i += nMatchLen; + + pCompressor->num_commands++; + } + else { + if (nNumLiterals == 0) + nInFirstLiteralOffset = i; + nNumLiterals++; + i++; + } + } + + { + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3); + + if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize) + return -1; + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) + pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47; + else + pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00; + nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); + if (nOutOffset < 0) return -1; + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pCompressor->num_commands++; + } + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { + /* Emit EOD marker for raw block */ + + if (nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = 0; /* Match offset */ + + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */ + if (nOutOffset < 0) return -1; + + if ((nOutOffset + 3) > nMaxOutDataSize) + return -1; + + pOutData[nOutOffset++] = 0; /* Extended match length byte */ + pOutData[nOutOffset++] = 0; /* 16-bit match length */ + pOutData[nOutOffset++] = 0; + } + + if (nCurNibbleOffset != -1) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0); + if (nOutOffset < 0 || nCurNibbleOffset != -1) + return -1; + } + + return nOutOffset; +} + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + int nDidReduce; + int nPasses = 0; + do { + nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + nPasses++; + } while (nDidReduce && nPasses < 20); + + return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); +} diff --git a/src/shrink_v2.h b/src/shrink_v2.h new file mode 100644 index 0000000..8817810 --- /dev/null +++ b/src/shrink_v2.h @@ -0,0 +1,53 @@ +/* + * shrink_v2.h - LZSA2 block compressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_V2_H +#define _SHRINK_V2_H + +/* Forward declarations */ +typedef struct _lsza_compressor lsza_compressor; + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v2(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); + +#endif /* _SHRINK_V2_H */