From b4e3c07d3a0f138bb7c0cb01ca4a7358cbb2c265 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Fri, 7 Jun 2019 23:15:40 +0200 Subject: [PATCH] Split code, add automated tests, update LZSA2 --- src/dictionary.c | 101 ++++++ src/dictionary.h | 56 ++++ src/expand_block_v1.c | 217 ++++++++++++ src/expand_block_v1.h | 49 +++ src/expand_block_v2.c | 242 ++++++++++++++ src/expand_block_v2.h | 49 +++ src/expand_context.c | 57 ++++ src/expand_context.h | 51 +++ src/expand_inmem.c | 160 +++++++++ src/expand_inmem.h | 61 ++++ src/expand_streaming.c | 243 ++++++++++++++ src/expand_streaming.h | 78 +++++ src/format.h | 2 + src/frame.c | 1 + src/lib.h | 208 +----------- src/lzsa.c | 402 ++++++++++++++++++++++- src/matchfinder.c | 9 +- src/matchfinder.h | 10 +- src/shrink_block_v1.c | 459 ++++++++++++++++++++++++++ src/shrink_block_v1.h | 53 +++ src/shrink_block_v2.c | 727 +++++++++++++++++++++++++++++++++++++++++ src/shrink_block_v2.h | 53 +++ src/shrink_context.c | 194 +++++++++++ src/shrink_context.h | 123 +++++++ src/shrink_inmem.c | 178 ++++++++++ src/shrink_inmem.h | 64 ++++ src/shrink_streaming.c | 285 ++++++++++++++++ src/shrink_streaming.h | 86 +++++ 28 files changed, 4002 insertions(+), 216 deletions(-) create mode 100644 src/dictionary.c create mode 100644 src/dictionary.h create mode 100644 src/expand_block_v1.c create mode 100644 src/expand_block_v1.h create mode 100644 src/expand_block_v2.c create mode 100644 src/expand_block_v2.h create mode 100644 src/expand_context.c create mode 100644 src/expand_context.h create mode 100644 src/expand_inmem.c create mode 100644 src/expand_inmem.h create mode 100644 src/expand_streaming.c create mode 100644 src/expand_streaming.h create mode 100644 src/shrink_block_v1.c create mode 100644 src/shrink_block_v1.h create mode 100644 src/shrink_block_v2.c create mode 100644 src/shrink_block_v2.h create mode 100644 src/shrink_context.c create mode 100644 src/shrink_context.h create mode 100644 src/shrink_inmem.c create mode 100644 src/shrink_inmem.h create mode 100644 src/shrink_streaming.c create mode 100644 src/shrink_streaming.h diff --git a/src/dictionary.c b/src/dictionary.c new file mode 100644 index 0000000..4f10cc4 --- /dev/null +++ b/src/dictionary.c @@ -0,0 +1,101 @@ +/* + * dictionary.c - dictionary implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "format.h" +#include "lib.h" + +/** + * Load dictionary contents + * + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param pDictionaryData pointer to returned dictionary contents, or NULL for none + * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) { + unsigned char *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + + if (pszDictionaryFilename) { + pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE); + if (!pDictionaryData) { + return LZSA_ERROR_MEMORY; + } + + FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); + if (!pDictionaryFile) { + free(pDictionaryData); + pDictionaryData = NULL; + return LZSA_ERROR_DICTIONARY; + } + + fseek(pDictionaryFile, 0, SEEK_END); +#ifdef _WIN32 + __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile); +#else + off_t nDictionaryFileSize = ftello(pDictionaryFile); +#endif + if (nDictionaryFileSize > BLOCK_SIZE) { + /* Use the last BLOCK_SIZE bytes of the dictionary */ + fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END); + } + else { + fseek(pDictionaryFile, 0, SEEK_SET); + } + + nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile); + if (nDictionaryDataSize < 0) + nDictionaryDataSize = 0; + + fclose(pDictionaryFile); + pDictionaryFile = NULL; + } + + *ppDictionaryData = pDictionaryData; + *pDictionaryDataSize = nDictionaryDataSize; + return LZSA_OK; +} + +/** + * Free dictionary contents + * + * @param pDictionaryData pointer to pointer to dictionary contents + */ +void lzsa_dictionary_free(void **ppDictionaryData) { + if (*ppDictionaryData) { + free(*ppDictionaryData); + ppDictionaryData = NULL; + } +} diff --git a/src/dictionary.h b/src/dictionary.h new file mode 100644 index 0000000..9e61296 --- /dev/null +++ b/src/dictionary.h @@ -0,0 +1,56 @@ +/* + * dictionary.h - dictionary definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _DICTIONARY_H +#define _DICTIONARY_H + +#include + +/** + * Load dictionary contents + * + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param pDictionaryData pointer to returned dictionary contents, or NULL for none + * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize); + +/** + * Free dictionary contents + * + * @param pDictionaryData pointer to pointer to dictionary contents + */ +void lzsa_dictionary_free(void **ppDictionaryData); + +#endif /* _DICTIONARY_H */ diff --git a/src/expand_block_v1.c b/src/expand_block_v1.c new file mode 100644 index 0000000..15f70d4 --- /dev/null +++ b/src/expand_block_v1.c @@ -0,0 +1,217 @@ +/* + * expand_v1.c - LZSA1 block decompressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "format.h" +#include "expand_block_v1.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else /* _MSC_VER */ +#define FORCE_INLINE __attribute__((always_inline)) +#endif /* _MSC_VER */ + +static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) { + unsigned int nByte; + const unsigned char *pInBlock = *ppInBlock; + + if (pInBlock < pInBlockEnd) { + nByte = *pInBlock++; + (*nLiterals) += nByte; + + if (nByte == 250) { + if (pInBlock < pInBlockEnd) { + (*nLiterals) = 256 + ((unsigned int)*pInBlock++); + } + else { + return -1; + } + } + else if (nByte == 249) { + if ((pInBlock + 1) < pInBlockEnd) { + (*nLiterals) = ((unsigned int)*pInBlock++); + (*nLiterals) |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + + *ppInBlock = pInBlock; + return 0; + } + else { + return -1; + } +} + +static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) { + unsigned int nByte; + const unsigned char *pInBlock = *ppInBlock; + + if (pInBlock < pInBlockEnd) { + nByte = *pInBlock++; + (*nMatchLen) += nByte; + + if (nByte == 239) { + if (pInBlock < pInBlockEnd) { + (*nMatchLen) = 256 + ((unsigned int)*pInBlock++); + } + else { + return -1; + } + } + else if (nByte == 238) { + if ((pInBlock + 1) < pInBlockEnd) { + (*nMatchLen) = ((unsigned int)*pInBlock++); + (*nMatchLen) |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + + *ppInBlock = pInBlock; + return 0; + } + else { + return -1; + } +} + +/** + * Decompress one LZSA1 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { + const unsigned char *pInBlockEnd = pInBlock + nBlockSize; + unsigned char *pCurOutData = pOutData + nOutDataOffset; + const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; + const unsigned char *pOutDataFastEnd = pOutDataEnd - 18; + + while (pInBlock < pInBlockEnd) { + const unsigned char token = *pInBlock++; + unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4); + + if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pInBlock, 8); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + if (nLiterals == LITERALS_RUN_LEN_V1) { + if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals)) + return -1; + } + + if (nLiterals != 0) { + if ((pInBlock + nLiterals) <= pInBlockEnd && + (pCurOutData + nLiterals) <= pOutDataEnd) { + memcpy(pCurOutData, pInBlock, nLiterals); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + return -1; + } + } + } + + if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ + unsigned int nMatchOffset; + + nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff; + if (token & 0x80) { + nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00; + } + nMatchOffset++; + + const unsigned char *pSrc = pCurOutData - nMatchOffset; + if (pSrc >= pOutData) { + unsigned int nMatchLen = (unsigned int)(token & 0x0f); + if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 8); + memcpy(pCurOutData + 16, pSrc + 16, 2); + pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen); + } + else { + nMatchLen += MIN_MATCH_SIZE_V1; + if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) { + if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen)) + return -1; + } + + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 16); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + } + } + else { + return -1; + } + } + } + + return (int)(pCurOutData - (pOutData + nOutDataOffset)); +} diff --git a/src/expand_block_v1.h b/src/expand_block_v1.h new file mode 100644 index 0000000..ac801ca --- /dev/null +++ b/src/expand_block_v1.h @@ -0,0 +1,49 @@ +/* + * expand_v1.h - LZSA1 block decompressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_V1_H +#define _EXPAND_V1_H + +/** + * Decompress one LZSA1 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); + +#endif /* _EXPAND_V1_H */ diff --git a/src/expand_block_v2.c b/src/expand_block_v2.c new file mode 100644 index 0000000..1e7d690 --- /dev/null +++ b/src/expand_block_v2.c @@ -0,0 +1,242 @@ +/* + * expand_v2.c - LZSA2 block decompressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "format.h" +#include "expand_block_v2.h" + +#ifdef _MSC_VER +#define FORCE_INLINE __forceinline +#else /* _MSC_VER */ +#define FORCE_INLINE __attribute__((always_inline)) +#endif /* _MSC_VER */ + +static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) { + if ((*nCurNibbles ^= 1) != 0) { + const unsigned char *pInBlock = *ppInBlock; + if (pInBlock < pInBlockEnd) { + (*nibbles) = *pInBlock++; + *ppInBlock = pInBlock; + (*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4; + return 0; + } + else { + return -1; + } + } + + (*nValue) = (unsigned int)((*nibbles) & 0x0f); + return 0; +} + +static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) { + unsigned int nValue; + + if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) { + (*nLength) += nValue; + + if (nValue == 15) { + const unsigned char *pInBlock = *ppInBlock; + + if (pInBlock < pInBlockEnd) { + (*nLength) += ((unsigned int)*pInBlock++); + + if ((*nLength) == 257) { + if ((pInBlock + 1) < pInBlockEnd) { + (*nLength) = ((unsigned int)*pInBlock++); + (*nLength) |= (((unsigned int)*pInBlock++) << 8); + } + else { + return -1; + } + } + } + else { + return -1; + } + + *ppInBlock = pInBlock; + } + + return 0; + } + else { + return -1; + } +} + +/** + * Decompress one LZSA2 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { + const unsigned char *pInBlockEnd = pInBlock + nBlockSize; + unsigned char *pCurOutData = pOutData + nOutDataOffset; + const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize; + const unsigned char *pOutDataFastEnd = pOutDataEnd - 20; + int nCurNibbles = 0; + unsigned char nibbles; + int nMatchOffset = 0; + + while (pInBlock < pInBlockEnd) { + const unsigned char token = *pInBlock++; + unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3); + + if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pInBlock, 4); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + if (nLiterals == LITERALS_RUN_LEN_V2) { + if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals)) + return -1; + } + + if (nLiterals != 0) { + if ((pInBlock + nLiterals) <= pInBlockEnd && + (pCurOutData + nLiterals) <= pOutDataEnd) { + memcpy(pCurOutData, pInBlock, nLiterals); + pInBlock += nLiterals; + pCurOutData += nLiterals; + } + else { + return -1; + } + } + } + + if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */ + unsigned char nOffsetMode = token & 0xc0; + unsigned int nValue; + + switch (nOffsetMode) { + case 0x00: + /* 5 bit offset */ + if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) + return -1; + nMatchOffset = nValue; + nMatchOffset |= ((token & 0x20) >> 1); + nMatchOffset ^= 0x1f; + nMatchOffset++; + break; + + case 0x40: + /* 9 bit offset */ + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); + nMatchOffset ^= 0x1ff; + nMatchOffset++; + break; + + case 0x80: + /* 13 bit offset */ + if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) + return -1; + nMatchOffset = (unsigned int)(*pInBlock++); + nMatchOffset |= (nValue << 8); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); + nMatchOffset ^= 0x1fff; + nMatchOffset += (512 + 1); + break; + + default: + /* Check if this is a 16 bit offset or a rep-match */ + if ((token & 0x20) == 0) { + /* 16 bit offset */ + nMatchOffset = (((unsigned int)(*pInBlock++)) << 8); + nMatchOffset |= (unsigned int)(*pInBlock++); + nMatchOffset ^= 0xffff; + nMatchOffset++; + } + break; + } + + const unsigned char *pSrc = pCurOutData - nMatchOffset; + if (pSrc >= pOutData) { + unsigned int nMatchLen = (unsigned int)(token & 0x07); + if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) { + memcpy(pCurOutData, pSrc, 8); + memcpy(pCurOutData + 8, pSrc + 8, 2); + pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen); + } + else { + nMatchLen += MIN_MATCH_SIZE_V2; + if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) { + if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen)) + return -1; + } + + if ((pCurOutData + nMatchLen) <= pOutDataEnd) { + /* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */ + + if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) { + const unsigned char *pCopySrc = pSrc; + unsigned char *pCopyDst = pCurOutData; + const unsigned char *pCopyEndDst = pCurOutData + nMatchLen; + + do { + memcpy(pCopyDst, pCopySrc, 16); + pCopySrc += 16; + pCopyDst += 16; + } while (pCopyDst < pCopyEndDst); + + pCurOutData += nMatchLen; + } + else { + while (nMatchLen) { + *pCurOutData++ = *pSrc++; + nMatchLen--; + } + } + } + else { + return -1; + } + } + } + else { + return -1; + } + } + } + + return (int)(pCurOutData - (pOutData + nOutDataOffset)); +} diff --git a/src/expand_block_v2.h b/src/expand_block_v2.h new file mode 100644 index 0000000..e2c8fdb --- /dev/null +++ b/src/expand_block_v2.h @@ -0,0 +1,49 @@ +/* + * expand_v2.h - LZSA2 block decompressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_V2_H +#define _EXPAND_V2_H + +/** + * Decompress one LZSA2 data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); + +#endif /* _EXPAND_V2_H */ diff --git a/src/expand_context.c b/src/expand_context.c new file mode 100644 index 0000000..42c7fd9 --- /dev/null +++ b/src/expand_context.c @@ -0,0 +1,57 @@ +/* + * expand_context.h - decompressor context definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "expand_context.h" +#include "expand_block_v1.h" +#include "expand_block_v2.h" + +/** + * Decompress one data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { + if (nFormatVersion == 1) + return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + else if (nFormatVersion == 2) + return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + else + return -1; +} diff --git a/src/expand_context.h b/src/expand_context.h new file mode 100644 index 0000000..302d261 --- /dev/null +++ b/src/expand_context.h @@ -0,0 +1,51 @@ +/* + * expand_context.h - decompressor context definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_CONTEXT_H +#define _EXPAND_CONTEXT_H + +#include + +/** + * Decompress one data block + * + * @param pInBlock pointer to compressed data + * @param nInBlockSize size of compressed data, in bytes + * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) + * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) + * @param nBlockMaxSize total size of output decompression buffer, in bytes + * + * @return size of decompressed data in bytes, or -1 for error + */ +int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); + +#endif /* _EXPAND_CONTEXT_H */ diff --git a/src/expand_inmem.c b/src/expand_inmem.c new file mode 100644 index 0000000..c5da3c8 --- /dev/null +++ b/src/expand_inmem.c @@ -0,0 +1,160 @@ +/* + * expand_inmem.c - in-memory decompression implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "expand_inmem.h" +#include "lib.h" +#include "frame.h" + +#define BLOCK_SIZE 65536 + +/** + * Get maximum decompressed size of compressed data + * + * @param pFileData compressed data + * @param nFileSize compressed size in bytes + * + * @return maximum decompressed size + */ +size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) { + const unsigned char *pCurFileData = pFileData; + const unsigned char *pEndFileData = pCurFileData + nFileSize; + int nFormatVersion = 0; + size_t nMaxDecompressedSize = 0; + const int nHeaderSize = lzsa_get_header_size(); + + /* Check header */ + if ((pCurFileData + nHeaderSize) > pEndFileData || + lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0) + return -1; + + pCurFileData += nHeaderSize; + + while (pCurFileData < pEndFileData) { + unsigned int nBlockDataSize = 0; + int nIsUncompressed = 0; + const int nFrameSize = lzsa_get_frame_size(); + + /* Decode frame header */ + if ((pCurFileData + nFrameSize) > pEndFileData || + lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0) + return -1; + pCurFileData += nFrameSize; + + if (!nBlockDataSize) + break; + + /* Add one potentially full block to the decompressed size */ + nMaxDecompressedSize += BLOCK_SIZE; + + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + + pCurFileData += nBlockDataSize; + } + + return nMaxDecompressedSize; +} + +/** + * Decompress data in memory + * + * @param pFileData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nFileSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param pFormatVersion pointer to format version, updated if this function is successful + * + * @return actual decompressed size, or -1 for error + */ +size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) { + const unsigned char *pCurFileData = pFileData; + const unsigned char *pEndFileData = pCurFileData + nFileSize; + unsigned char *pCurOutBuffer = pOutBuffer; + const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize; + int nFormatVersion = 0; + int nPreviousBlockSize; + const int nHeaderSize = lzsa_get_header_size(); + + /* Check header */ + if ((pCurFileData + nHeaderSize) > pEndFileData || + lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0) + return -1; + + pCurFileData += nHeaderSize; + nPreviousBlockSize = 0; + + while (pCurFileData < pEndFileData) { + unsigned int nBlockDataSize = 0; + int nIsUncompressed = 0; + const int nFrameSize = lzsa_get_frame_size(); + + /* Decode frame header */ + if ((pCurFileData + nFrameSize) > pEndFileData || + lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0) + return -1; + pCurFileData += nFrameSize; + + if (!nBlockDataSize) + break; + + if (!nIsUncompressed) { + int nDecompressedSize; + + /* Decompress block */ + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + + nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize)); + if (nDecompressedSize < 0) + return -1; + + pCurOutBuffer += nDecompressedSize; + nPreviousBlockSize = nDecompressedSize; + } + else { + /* Copy uncompressed block */ + if ((pCurFileData + nBlockDataSize) > pEndFileData) + return -1; + if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer) + return -1; + memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize); + pCurOutBuffer += nBlockDataSize; + } + + pCurFileData += nBlockDataSize; + } + + *pFormatVersion = nFormatVersion; + return (int)(pCurOutBuffer - pOutBuffer); +} diff --git a/src/expand_inmem.h b/src/expand_inmem.h new file mode 100644 index 0000000..a2baee1 --- /dev/null +++ b/src/expand_inmem.h @@ -0,0 +1,61 @@ +/* + * expand_inmem.h - in-memory decompression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_INMEM_H +#define _EXPAND_INMEM_H + +#include + +/** + * Get maximum decompressed size of compressed data + * + * @param pFileData compressed data + * @param nFileSize compressed size in bytes + * + * @return maximum decompressed size + */ +size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize); + +/** + * Decompress data in memory + * + * @param pFileData compressed data + * @param pOutBuffer buffer for decompressed data + * @param nFileSize compressed size in bytes + * @param nMaxOutBufferSize maximum capacity of decompression buffer + * @param pFormatVersion pointer to format version, updated if this function is successful + * + * @return actual decompressed size, or -1 for error + */ +size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion); + +#endif /* _EXPAND_INMEM_H */ diff --git a/src/expand_streaming.c b/src/expand_streaming.c new file mode 100644 index 0000000..d4b04ba --- /dev/null +++ b/src/expand_streaming.c @@ -0,0 +1,243 @@ +/* + * expand_streaming.c - streaming decompression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + + +#include +#include +#include "expand_streaming.h" +#include "format.h" +#include "frame.h" +#include "lib.h" + +/*-------------- File API -------------- */ + +/** + * Decompress file + * + * @param pszInFilename name of input(compressed) file to decompress + * @param pszOutFilename name of output(decompressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize) { + lzsa_stream_t inStream, outStream; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { + return LZSA_ERROR_SRC; + } + + if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) { + inStream.close(&inStream); + return LZSA_ERROR_DST; + } + + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + if (nStatus) { + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; + } + + nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize); + + lzsa_dictionary_free(&pDictionaryData); + outStream.close(&outStream); + inStream.close(&inStream); + + return nStatus; +} + +/*-------------- Streaming API -------------- */ + +/** + * Decompress stream + * + * @param pInStream input(compressed) stream to decompress + * @param pOutStream output(decompressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize) { + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + unsigned char cFrameData[16]; + unsigned char *pInBlock; + unsigned char *pOutData; + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + const int nHeaderSize = lzsa_get_header_size(); + + memset(cFrameData, 0, 16); + if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) { + return LZSA_ERROR_SRC; + } + + if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { + return LZSA_ERROR_FORMAT; + } + + nCompressedSize += (long long)nHeaderSize; + } + + pInBlock = (unsigned char*)malloc(BLOCK_SIZE); + if (!pInBlock) { + return LZSA_ERROR_MEMORY; + } + + pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2); + if (!pOutData) { + free(pInBlock); + pInBlock = NULL; + + return LZSA_ERROR_MEMORY; + } + + int nDecompressionError = 0; + int nPrevDecompressedSize = 0; + int nNumBlocks = 0; + + while (!pInStream->eof(pInStream) && !nDecompressionError) { + unsigned int nBlockSize = 0; + int nIsUncompressed = 0; + + if (nPrevDecompressedSize != 0) { + memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize); + } + else if (nDictionaryDataSize && pDictionaryData) { + nPrevDecompressedSize = nDictionaryDataSize; + memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize); + } + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + const int nFrameSize = lzsa_get_frame_size(); + + memset(cFrameData, 0, 16); + if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) { + if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) { + nDecompressionError = LZSA_ERROR_FORMAT; + nBlockSize = 0; + } + + nCompressedSize += (long long)nFrameSize; + } + else { + nDecompressionError = LZSA_ERROR_SRC; + nBlockSize = 0; + } + } + else { + if (!nNumBlocks) + nBlockSize = BLOCK_SIZE; + else + nBlockSize = 0; + } + + if (nBlockSize != 0) { + int nDecompressedSize = 0; + + if ((int)nBlockSize > BLOCK_SIZE) { + nDecompressionError = LZSA_ERROR_FORMAT; + break; + } + size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize); + if (nFlags & LZSA_FLAG_RAW_BLOCK) { + if (nReadBytes > 2) + nReadBytes -= 2; + else + nReadBytes = 0; + nBlockSize = (unsigned int)nReadBytes; + } + + if (nReadBytes == nBlockSize) { + nCompressedSize += (long long)nReadBytes; + + if (nIsUncompressed) { + memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize); + nDecompressedSize = nBlockSize; + } + else { + unsigned int nBlockOffs = 0; + + nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); + if (nDecompressedSize < 0) { + nDecompressionError = LZSA_ERROR_DECOMPRESSION; + break; + } + } + + if (nDecompressedSize != 0) { + nOriginalSize += (long long)nDecompressedSize; + + if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize) + nDecompressionError = LZSA_ERROR_DST; + nPrevDecompressedSize = nDecompressedSize; + nDecompressedSize = 0; + } + } + else { + break; + } + + nNumBlocks++; + } + else { + break; + } + } + + free(pOutData); + pOutData = NULL; + + free(pInBlock); + pInBlock = NULL; + + *pOriginalSize = nOriginalSize; + *pCompressedSize = nCompressedSize; + return nDecompressionError; +} + diff --git a/src/expand_streaming.h b/src/expand_streaming.h new file mode 100644 index 0000000..9958b82 --- /dev/null +++ b/src/expand_streaming.h @@ -0,0 +1,78 @@ +/* + * expand_streaming.h - streaming decompression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _EXPAND_STREAMING_H +#define _EXPAND_STREAMING_H + +#include "stream.h" + +/* Forward declaration */ +typedef enum _lzsa_status_t lzsa_status_t; + +/*-------------- File API -------------- */ + +/** + * Decompress file + * + * @param pszInFilename name of input(compressed) file to decompress + * @param pszOutFilename name of output(decompressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize); + +/*-------------- Streaming API -------------- */ + +/** + * Decompress stream + * + * @param pInStream input(compressed) stream to decompress + * @param pOutStream output(decompressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize); + +#endif /* _EXPAND_STREAMING_H */ diff --git a/src/format.h b/src/format.h index 108811c..ace4f15 100755 --- a/src/format.h +++ b/src/format.h @@ -38,6 +38,8 @@ #define MAX_VARLEN 0xffff +#define BLOCK_SIZE 65536 + #define MIN_MATCH_SIZE_V1 3 #define LITERALS_RUN_LEN_V1 7 #define MATCH_RUN_LEN_V1 15 diff --git a/src/frame.c b/src/frame.c index edbe531..f1d6be2 100644 --- a/src/frame.c +++ b/src/frame.c @@ -31,6 +31,7 @@ */ #include +#include #include "frame.h" #define LZSA_ID_0 0x7b diff --git a/src/lib.h b/src/lib.h index 7296204..60cda13 100755 --- a/src/lib.h +++ b/src/lib.h @@ -33,11 +33,19 @@ #ifndef _LIB_H #define _LIB_H -#include "divsufsort.h" #include "stream.h" +#include "dictionary.h" +#include "frame.h" +#include "format.h" +#include "shrink_context.h" +#include "shrink_streaming.h" +#include "shrink_inmem.h" +#include "expand_context.h" +#include "expand_streaming.h" +#include "expand_inmem.h" /** High level status for compression and decompression */ -typedef enum { +typedef enum _lzsa_status_t { LZSA_OK = 0, /**< Success */ LZSA_ERROR_SRC, /**< Error reading input */ LZSA_ERROR_DST, /**< Error reading output */ @@ -58,200 +66,4 @@ typedef enum { #define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */ #define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */ -/*-------------- Top level API -------------- */ - -/** - * Compress file - * - * @param pszInFilename name of input(source) file to compress - * @param pszOutFilename name of output(compressed) file to generate - * @param pszDictionaryFilename name of dictionary file, or NULL for none - * @param nFlags compression flags (LZSA_FLAG_xxx) - * @param nMinMatchSize minimum match size - * @param nFormatVersion version of format to use (1-2) - * @param progress progress function, called after compressing each block, or NULL for none - * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful - * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful - * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful - * - * @return LZSA_OK for success, or an error value from lzsa_status_t - */ -lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, - const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); - -/** - * Decompress file - * - * @param pszInFilename name of input(compressed) file to decompress - * @param pszOutFilename name of output(decompressed) file to generate - * @param pszDictionaryFilename name of dictionary file, or NULL for none - * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) - * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file - * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful - * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful - * - * @return LZSA_OK for success, or an error value from lzsa_status_t - */ -lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion, - long long *pOriginalSize, long long *pCompressedSize); - -/*-------------- Streaming API -------------- */ - -/** - * Load dictionary contents - * - * @param pszDictionaryFilename name of dictionary file, or NULL for none - * @param pDictionaryData pointer to returned dictionary contents, or NULL for none - * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 - * - * @return LZSA_OK for success, or an error value from lzsa_status_t - */ -int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize); - -/** - * Free dictionary contents - * - * @param pDictionaryData pointer to pointer to dictionary contents - */ -void lzsa_dictionary_free(void **ppDictionaryData); - -/** - * Compress stream - * - * @param pInStream input(source) stream to compress - * @param pOutStream output(compressed) stream to write to - * @param pDictionaryData dictionary contents, or NULL for none - * @param nDictionaryDataSize size of dictionary contents, or 0 - * @param nFlags compression flags (LZSA_FLAG_xxx) - * @param nMinMatchSize minimum match size - * @param nFormatVersion version of format to use (1-2) - * @param progress progress function, called after compressing each block, or NULL for none - * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful - * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful - * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful - * - * @return LZSA_OK for success, or an error value from lzsa_status_t - */ -lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, - const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); - -/** - * Decompress stream - * - * @param pInStream input(compressed) stream to decompress - * @param pOutStream output(decompressed) stream to write to - * @param pDictionaryData dictionary contents, or NULL for none - * @param nDictionaryDataSize size of dictionary contents, or 0 - * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) - * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file - * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful - * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful - * - * @return LZSA_OK for success, or an error value from lzsa_status_t - */ -lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, - long long *pOriginalSize, long long *pCompressedSize); - -/*-------------- Block compression API --------------*/ - -#define LCP_BITS 15 -#define LCP_MAX (1<<(LCP_BITS - 1)) -#define LCP_SHIFT (32-LCP_BITS) -#define LCP_MASK (((1< #endif #include "lib.h" -#include "inmem.h" #define OPT_VERBOSE 1 #define OPT_RAW 2 @@ -115,7 +114,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co nStartTime = do_get_time(); } - nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount); + nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount); if ((nOptions & OPT_VERBOSE)) { nEndTime = do_get_time(); @@ -345,7 +344,369 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con /*---------------------------------------------------------------------------*/ -static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { +static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) { + size_t nIndex = 0; + int nMatchProbability = (int)(fMatchProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + pBuffer[nIndex++] = rand() % nNumLiteralValues; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) >= nMatchProbability) { + size_t nLiteralCount = rand() & 127; + if (nLiteralCount > (nBufferSize - nIndex)) + nLiteralCount = nBufferSize - nIndex; + + while (nLiteralCount--) + pBuffer[nIndex++] = rand() % nNumLiteralValues; + } + else { + size_t nMatchLength = nMinMatchSize + (rand() & 1023); + size_t nMatchOffset; + + if (nMatchLength > (nBufferSize - nIndex)) + nMatchLength = nBufferSize - nIndex; + if (nMatchLength > nIndex) + nMatchLength = nIndex; + + if (nMatchLength < nIndex) + nMatchOffset = rand() % (nIndex - nMatchLength); + else + nMatchOffset = 0; + + while (nMatchLength--) { + pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset]; + nIndex++; + } + } + } +} + +static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) { + size_t nIndex = 0; + int nXorProbability = (int)(fXorProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) < nXorProbability) { + pBuffer[nIndex] ^= 0xff; + } + nIndex++; + } +} + +static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) { + unsigned char *pGeneratedData; + unsigned char *pCompressedData; + unsigned char *pTmpCompressedData; + unsigned char *pTmpDecompressedData; + size_t nGeneratedDataSize; + size_t nMaxCompressedDataSize; + unsigned int nSeed = 123; + int nFlags; + int i; + + nFlags = 0; + if (nOptions & OPT_FAVOR_RATIO) + nFlags |= LZSA_FLAG_FAVOR_RATIO; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + + pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pGeneratedData) { + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + nMaxCompressedDataSize = lzsa_get_max_compressed_size_inmem(4 * BLOCK_SIZE); + pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pCompressedData) { + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pTmpCompressedData) { + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pTmpDecompressedData) { + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + memset(pGeneratedData, 0, 4 * BLOCK_SIZE); + memset(pCompressedData, 0, nMaxCompressedDataSize); + memset(pTmpCompressedData, 0, nMaxCompressedDataSize); + + /* Test compressing with a too small buffer to do anything, expect to fail cleanly */ + for (i = 0; i < 12; i++) { + generate_compressible_data(pGeneratedData, i, nMinMatchSize, nSeed, 256, 0.5f); + lzsa_compress_inmem(pGeneratedData, pCompressedData, i, i, nFlags, nMinMatchSize, nFormatVersion); + } + + size_t nDataSizeStep = 128; + float fProbabilitySizeStep = 0.0005f; + + for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (4 * BLOCK_SIZE); nGeneratedDataSize += nDataSizeStep) { + float fMatchProbability; + + fprintf(stdout, "size %zd", nGeneratedDataSize); + for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) { + int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 }; + float fXorProbability; + + fputc('.', stdout); + fflush(stdout); + + for (i = 0; i < 12; i++) { + /* Generate data to compress */ + generate_compressible_data(pGeneratedData, nGeneratedDataSize, nMinMatchSize, nSeed, nNumLiteralValues[i], fMatchProbability); + + /* Try to compress it, expected to succeed */ + size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize), + nFlags, nMinMatchSize, nFormatVersion); + if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress it, expected to succeed */ + size_t nActualDecompressedSize; + int nDecFormatVersion = 0; + nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion); + if (nActualDecompressedSize == -1) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */ + for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) { + memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize); + xor_data(pTmpCompressedData + lzsa_get_header_size() + lzsa_get_frame_size(), nActualCompressedSize - lzsa_get_header_size() - lzsa_get_frame_size() - lzsa_get_frame_size() /* footer */, nSeed, fXorProbability); + lzsa_decompress_inmem(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion); + } + } + + nSeed++; + } + + fputc(10, stdout); + fflush(stdout); + + nDataSizeStep <<= 1; + if (nDataSizeStep > (128 * 4096)) + nDataSizeStep = 128 * 4096; + fProbabilitySizeStep *= 1.25; + if (fProbabilitySizeStep > (0.0005f * 4096)) + fProbabilitySizeStep = 0.0005f * 4096; + } + + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + + free(pTmpCompressedData); + pTmpCompressedData = NULL; + + free(pCompressedData); + pCompressedData = NULL; + + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stdout, "All tests passed.\n"); + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) { + size_t nFileSize, nMaxCompressedSize; + unsigned char *pFileData; + unsigned char *pCompressedData; + int nFlags; + int i; + + nFlags = 0; + if (nOptions & OPT_FAVOR_RATIO) + nFlags |= LZSA_FLAG_FAVOR_RATIO; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole original file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + /* Allocate max compressed size */ + + nMaxCompressedSize = lzsa_get_max_compressed_size_inmem(nFileSize); + + pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048); + if (!pCompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize); + return 100; + } + + memset(pCompressedData + 1024, 0, nMaxCompressedSize); + + long long nBestCompTime = -1; + + size_t nActualCompressedSize = 0; + size_t nRightGuardPos = nMaxCompressedSize; + + for (i = 0; i < 5; i++) { + unsigned char nGuard = 0x33 + i; + int j; + + /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */ + memset(pCompressedData, nGuard, 1024); + memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024); + + long long t0 = do_get_time(); + nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion); + long long t1 = do_get_time(); + if (nActualCompressedSize == -1) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "compression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestCompTime == -1 || nBestCompTime > nCurDecTime) + nBestCompTime = nCurDecTime; + + /* Check guard bytes before the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024); + return 100; + } + } + + /* Check guard bytes after the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j); + return 100; + } + } + + nRightGuardPos = nActualCompressedSize; + } + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole compressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out); + fclose(f_out); + } + } + + free(pCompressedData); + free(pFileData); + + fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize); + fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { size_t nFileSize, nMaxDecompressedSize; unsigned char *pFileData; unsigned char *pDecompressedData; @@ -389,7 +750,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c if (nOptions & OPT_RAW) nMaxDecompressedSize = 65536; else - nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize); + nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize); if (nMaxDecompressedSize == -1) { free(pFileData); fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); @@ -413,7 +774,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c if (nOptions & OPT_RAW) nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize); else - nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion); + nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion); long long t1 = do_get_time(); if (nActualDecompressedSize == -1) { free(pDecompressedData); @@ -490,7 +851,15 @@ int main(int argc, char **argv) { else bArgsError = true; } - else if (!strcmp(argv[i], "-bench")) { + else if (!strcmp(argv[i], "-cbench")) { + if (!bCommandDefined) { + bCommandDefined = true; + cCommand = 'B'; + } + else + bArgsError = true; + } + else if (!strcmp(argv[i], "-dbench")) { if (!bCommandDefined) { bCommandDefined = true; cCommand = 'b'; @@ -498,6 +867,14 @@ int main(int argc, char **argv) { else bArgsError = true; } + else if (!strcmp(argv[i], "-test")) { + if (!bCommandDefined) { + bCommandDefined = true; + cCommand = 't'; + } + else + bArgsError = true; + } else if (!strcmp(argv[i], "-D")) { if (!pszDictionaryFilename && (i + 1) < argc) { pszDictionaryFilename = argv[i + 1]; @@ -616,12 +993,18 @@ int main(int argc, char **argv) { } } + if (!bArgsError && cCommand == 't') { + return do_self_test(nOptions, nMinMatchSize, nFormatVersion); + } + if (bArgsError || !pszInFilename || !pszOutFilename) { fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n"); fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] \n", argv[0]); fprintf(stderr, " -c: check resulting stream after compressing\n"); fprintf(stderr, " -d: decompress (default: compress)\n"); - fprintf(stderr, " -bench: benchmary in-memory decompression\n"); + fprintf(stderr, " -cbench: benchmary in-memory compression\n"); + fprintf(stderr, " -dbench: benchmary in-memory decompression\n"); + fprintf(stderr, " -test: run automated self-tests\n"); fprintf(stderr, " -v: be verbose\n"); fprintf(stderr, " -f : LZSA compression format (1-2)\n"); fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); @@ -643,8 +1026,11 @@ int main(int argc, char **argv) { else if (cCommand == 'd') { return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); } + else if (cCommand == 'B') { + return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion); + } else if (cCommand == 'b') { - return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); + return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); } else { return 100; diff --git a/src/matchfinder.c b/src/matchfinder.c index c421eb6..86881a9 100644 --- a/src/matchfinder.c +++ b/src/matchfinder.c @@ -29,7 +29,6 @@ * */ -#include #include #include #include "matchfinder.h" @@ -45,7 +44,7 @@ * * @return 0 for success, non-zero for failure */ -int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) { +int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) { unsigned int *intervals = pCompressor->intervals; /* Build suffix array from input data */ @@ -170,7 +169,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p * * @return number of matches */ -int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) { +int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) { unsigned int *intervals = pCompressor->intervals; unsigned int *pos_data = pCompressor->pos_data; unsigned int ref; @@ -248,7 +247,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m * @param nStartOffset current offset in input window (typically 0) * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) */ -void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { +void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { lzsa_match match; int i; @@ -267,7 +266,7 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes */ -void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { +void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT); int i; diff --git a/src/matchfinder.h b/src/matchfinder.h index 27bcc34..09f24bc 100644 --- a/src/matchfinder.h +++ b/src/matchfinder.h @@ -35,7 +35,7 @@ /* Forward declarations */ typedef struct _lzsa_match lzsa_match; -typedef struct _lsza_compressor lsza_compressor; +typedef struct _lzsa_compressor lzsa_compressor; /** * Parse input data, build suffix array and overlaid data structures to speed up match finding @@ -46,7 +46,7 @@ typedef struct _lsza_compressor lsza_compressor; * * @return 0 for success, non-zero for failure */ -int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize); +int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize); /** * Find matches at the specified offset in the input window @@ -58,7 +58,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p * * @return number of matches */ -int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches); +int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches); /** * Skip previously compressed bytes @@ -67,7 +67,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m * @param nStartOffset current offset in input window (typically 0) * @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes) */ -void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset); +void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset); /** * Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for @@ -77,6 +77,6 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes */ -void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset); +void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset); #endif /* _MATCHFINDER_H */ diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c new file mode 100644 index 0000000..ad90cd2 --- /dev/null +++ b/src/shrink_block_v1.c @@ -0,0 +1,459 @@ +/* + * shrink_v1.c - LZSA1 block compressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "lib.h" +#include "shrink_block_v1.h" +#include "format.h" + +/** + * Get the number of extra bits required to represent a literals length + * + * @param nLength literals length + * + * @return number of extra bits required + */ +static inline int lzsa_get_literals_varlen_size_v1(const int nLength) { + if (nLength < LITERALS_RUN_LEN_V1) { + return 0; + } + else { + if (nLength < 256) + return 8; + else { + if (nLength < 512) + return 16; + else + return 24; + } + } +} + +/** + * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength literals length + */ +static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) { + if (nLength >= LITERALS_RUN_LEN_V1) { + if (nLength < 256) + pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1; + else { + if (nLength < 512) { + pOutData[nOutOffset++] = 250; + pOutData[nOutOffset++] = nLength - 256; + } + else { + pOutData[nOutOffset++] = 249; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent an encoded match length + * + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1) + * + * @return number of extra bits required + */ +static inline int lzsa_get_match_varlen_size_v1(const int nLength) { + if (nLength < MATCH_RUN_LEN_V1) { + return 0; + } + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 256) + return 8; + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 512) + return 16; + else + return 24; + } + } +} + +/** + * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1) + */ +static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) { + if (nLength >= MATCH_RUN_LEN_V1) { + if ((nLength + MIN_MATCH_SIZE_V1) < 256) + pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1; + else { + if ((nLength + MIN_MATCH_SIZE_V1) < 512) { + pOutData[nOutOffset++] = 239; + pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256; + } + else { + pOutData[nOutOffset++] = 238; + pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff; + pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int *cost = (int*)pCompressor->pos_data; /* Reuse */ + int nLastLiteralsOffset; + int nMinMatchSize = pCompressor->min_match_size; + const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; + int i; + + cost[nEndOffset - 1] = 8; + nLastLiteralsOffset = nEndOffset; + + for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) { + int nBestCost, nBestMatchLen, nBestMatchOffset; + + int nLiteralsLen = nLastLiteralsOffset - i; + nBestCost = 8 + cost[i + 1]; + if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) { + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + nBestCost += 8; + } + if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nBestCost += MODESWITCH_PENALTY; + nBestMatchLen = 0; + nBestMatchOffset = 0; + + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + int m; + + for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) { + int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16; + + if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) { + int nCurCost; + int nMatchLen = pMatch[m].length; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1); + nCurCost += cost[i + nMatchLen]; + if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = nMatchLen; + nBestMatchOffset = pMatch[m].offset; + } + } + else { + int nMatchLen = pMatch[m].length; + int k, nMatchRunLen; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nMatchRunLen = nMatchLen; + if (nMatchRunLen > MATCH_RUN_LEN_V1) + nMatchRunLen = MATCH_RUN_LEN_V1; + + for (k = nMinMatchSize; k < nMatchRunLen; k++) { + int nCurCost; + + nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */; + nCurCost += cost[i + k]; + if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + } + } + + for (; k <= nMatchLen; k++) { + int nCurCost; + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1); + nCurCost += cost[i + k]; + if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + } + } + } + } + + if (nBestMatchLen >= MIN_MATCH_SIZE_V1) + nLastLiteralsOffset = i; + + cost[i] = nBestCost; + pMatch->length = nBestMatchLen; + pMatch->offset = nBestMatchOffset; + } +} + +/** + * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without + * impacting the compression ratio + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * + * @return non-zero if the number of tokens was reduced, 0 if it wasn't + */ +static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int i; + int nNumLiterals = 0; + int nDidReduce = 0; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + + if (pMatch->length >= MIN_MATCH_SIZE_V1) { + int nMatchLen = pMatch->length; + int nReduce = 0; + + if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ { + int nMatchOffset = pMatch->offset; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + + if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) { + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) { + /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will + * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is + * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current + * match command by literals, the output size will not increase and it will remove one command. */ + nReduce = 1; + } + } + else { + int nCurIndex = i + nMatchLen; + int nNextNumLiterals = 0; + + do { + nCurIndex++; + nNextNumLiterals++; + } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1); + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) { + /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take + * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */ + nReduce = 1; + } + } + } + + if (nReduce) { + int j; + + for (j = 0; j < nMatchLen; j++) { + pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0; + } + nNumLiterals += nMatchLen; + i += nMatchLen; + + nDidReduce = 1; + } + else { + if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX && + pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 && + (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) { + /* Join */ + + pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length; + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0; + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1; + continue; + } + + nNumLiterals = 0; + i += nMatchLen; + } + } + else { + nNumLiterals++; + i++; + } + } + + return nDidReduce; +} + +/** + * Emit block of compressed data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { + int i; + int nNumLiterals = 0; + int nInFirstLiteralOffset = 0; + int nOutOffset = 0; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + + if (pMatch->length >= MIN_MATCH_SIZE_V1) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen; + int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + + if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) + return -1; + if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET) + return -1; + + pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen; + nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + if (nTokenLongOffset) { + pOutData[nOutOffset++] = (-nMatchOffset) >> 8; + } + nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen); + i += nMatchLen; + + pCompressor->num_commands++; + } + else { + if (nNumLiterals == 0) + nInFirstLiteralOffset = i; + nNumLiterals++; + i++; + } + } + + { + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); + + if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) + return -1; + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) + pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f; + else + pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00; + nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pCompressor->num_commands++; + } + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { + /* Emit EOD marker for raw block */ + + if ((nOutOffset + 4) > nMaxOutDataSize) + return -1; + + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = 238; + pOutData[nOutOffset++] = 0; + pOutData[nOutOffset++] = 0; + } + + return nOutOffset; +} + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + int nDidReduce; + int nPasses = 0; + do { + nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + nPasses++; + } while (nDidReduce && nPasses < 20); + + return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); +} diff --git a/src/shrink_block_v1.h b/src/shrink_block_v1.h new file mode 100644 index 0000000..0233abd --- /dev/null +++ b/src/shrink_block_v1.h @@ -0,0 +1,53 @@ +/* + * shrink_v1.h - LZSA1 block compressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_BLOCK_V1_H +#define _SHRINK_BLOCK_V1_H + +/* Forward declarations */ +typedef struct _lzsa_compressor lzsa_compressor; + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); + +#endif /* _SHRINK_BLOCK_V1_H */ diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c new file mode 100644 index 0000000..ca98651 --- /dev/null +++ b/src/shrink_block_v2.c @@ -0,0 +1,727 @@ +/* + * shrink_v2.c - LZSA2 block compressor implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "lib.h" +#include "shrink_block_v2.h" +#include "format.h" + +/** + * Write 4-bit nibble to output (compressed) buffer + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles + * @param nCurFreeNibbles current number of free nibbles in byte + * @param nNibbleValue value to write (0..15) + */ +static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) { + if (nOutOffset < 0) return -1; + + if ((*nCurNibbleOffset) == -1) { + if (nOutOffset >= nMaxOutDataSize) return -1; + (*nCurNibbleOffset) = nOutOffset; + (*nCurFreeNibbles) = 2; + pOutData[nOutOffset++] = 0; + } + + pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f); + (*nCurFreeNibbles)--; + if ((*nCurFreeNibbles) == 0) { + (*nCurNibbleOffset) = -1; + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent a literals length + * + * @param nLength literals length + * + * @return number of extra bits required + */ +static inline int lzsa_get_literals_varlen_size_v2(const int nLength) { + if (nLength < LITERALS_RUN_LEN_V2) { + return 0; + } + else { + if (nLength < (LITERALS_RUN_LEN_V2 + 15)) { + return 4; + } + else { + if (nLength < 256) + return 4+8; + else { + return 4+24; + } + } + } +} + +/** + * Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength literals length + */ +static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) { + if (nLength >= LITERALS_RUN_LEN_V2) { + if (nLength < (LITERALS_RUN_LEN_V2 + 15)) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2); + } + else { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15); + if (nOutOffset < 0) return -1; + + if (nLength < 256) + pOutData[nOutOffset++] = nLength - 18; + else { + pOutData[nOutOffset++] = 239; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Get the number of extra bits required to represent an encoded match length + * + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2) + * + * @return number of extra bits required + */ +static inline int lzsa_get_match_varlen_size_v2(const int nLength) { + if (nLength < MATCH_RUN_LEN_V2) { + return 0; + } + else { + if (nLength < (MATCH_RUN_LEN_V2 + 15)) + return 4; + else { + if ((nLength + MIN_MATCH_SIZE_V2) < 256) + return 4+8; + else { + return 4 + 24; + } + } + } +} + +/** + * Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough + * room to write the bytes. + * + * @param pOutData pointer to output buffer + * @param nOutOffset current write index into output buffer + * @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2) + */ +static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) { + if (nLength >= MATCH_RUN_LEN_V2) { + if (nLength < (MATCH_RUN_LEN_V2 + 15)) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2); + } + else { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15); + if (nOutOffset < 0) return -1; + + if ((nLength + MIN_MATCH_SIZE_V2) < 256) + pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2 - 24; + else { + pOutData[nOutOffset++] = 233; + pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff; + pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff; + } + } + } + + return nOutOffset; +} + +/** + * Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int *cost = (int*)pCompressor->pos_data; /* Reuse */ + int *prev_match = (int*)pCompressor->intervals; /* Reuse */ + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + lzsa_match *pBestMatch = pCompressor->best_match; + int nLastLiteralsOffset; + int nMinMatchSize = pCompressor->min_match_size; + const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; + int i; + + cost[nEndOffset - 1] = 8; + prev_match[nEndOffset - 1] = nEndOffset; + nLastLiteralsOffset = nEndOffset; + + pCompressor->best_match[nEndOffset - 1].length = 0; + pCompressor->best_match[nEndOffset - 1].offset = 0; + + repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1; + repmatch_opt[nEndOffset - 1].incoming_offset = -1; + repmatch_opt[nEndOffset - 1].expected_repmatch = 0; + + for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) { + int nLiteralsCost; + + int nLiteralsLen = nLastLiteralsOffset - i; + nLiteralsCost = 8 + cost[i + 1]; + + /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary. + * The cost automatically accumulates down the chain. */ + if (nLiteralsLen == LITERALS_RUN_LEN_V2) { + nLiteralsCost += 4; + } + else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) { + nLiteralsCost += 8; + } + else if (nLiteralsLen == 256) { + nLiteralsCost += 16; + } + if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2) + nLiteralsCost += MODESWITCH_PENALTY; + + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT); + int m; + + cost[i] = nLiteralsCost; + pCompressor->best_match[i].length = 0; + pCompressor->best_match[i].offset = 0; + + repmatch_opt[i].best_slot_for_incoming = -1; + repmatch_opt[i].incoming_offset = -1; + repmatch_opt[i].expected_repmatch = 0; + + for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) { + int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch; + + nBestCost = nLiteralsCost; + nBestMatchLen = 0; + nBestMatchOffset = 0; + nBestUpdatedSlot = -1; + nBestUpdatedIndex = -1; + nBestExpectedRepMatch = 0; + + if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) { + int nCurCost; + int nMatchLen = pMatch[m].length; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + int nCurIndex = prev_match[i + nMatchLen]; + + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2); + nCurCost += cost[i + nMatchLen]; + if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = nMatchLen; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = -1; + nBestUpdatedIndex = -1; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + else { + int nMatchLen = pMatch[m].length; + int k, nMatchRunLen; + + if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) + nMatchLen = nEndOffset - LAST_LITERALS - i; + + nMatchRunLen = nMatchLen; + if (nMatchRunLen > MATCH_RUN_LEN_V2) + nMatchRunLen = MATCH_RUN_LEN_V2; + + for (k = nMinMatchSize; k < nMatchRunLen; k++) { + int nCurCost; + + int nCurIndex = prev_match[i + k]; + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */; + nCurCost += cost[i + k]; + if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + int nCurUpdatedSlot = -1; + int nCurUpdatedIndex = -1; + + if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) { + int r; + + for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) { + if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) { + int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex]; + + if (nAltCost <= nCurCost) { + nCurUpdatedSlot = r; + nCurUpdatedIndex = nCurIndex; + nCurCost = nAltCost; + nCurExpectedRepMatch = 2; + } + } + } + } + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = nCurUpdatedSlot; + nBestUpdatedIndex = nCurUpdatedIndex; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + + for (; k <= nMatchLen; k++) { + int nCurCost; + + int nCurIndex = prev_match[i + k]; + int nMatchOffsetSize = 0; + int nCurExpectedRepMatch = 1; + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) { + nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16)); + nCurExpectedRepMatch = 0; + } + + nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2); + nCurCost += cost[i + k]; + if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2) + nCurCost += MODESWITCH_PENALTY; + + int nCurUpdatedSlot = -1; + int nCurUpdatedIndex = -1; + + if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) { + int r; + + for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) { + if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) { + int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex]; + + if (nAltCost <= nCurCost) { + nCurUpdatedSlot = r; + nCurUpdatedIndex = nCurIndex; + nCurCost = nAltCost; + nCurExpectedRepMatch = 2; + } + } + } + } + + if (nBestCost > (nCurCost - nFavorRatio)) { + nBestCost = nCurCost; + nBestMatchLen = k; + nBestMatchOffset = pMatch[m].offset; + nBestUpdatedSlot = nCurUpdatedSlot; + nBestUpdatedIndex = nCurUpdatedIndex; + nBestExpectedRepMatch = nCurExpectedRepMatch; + } + } + } + + pSlotCost[m] = nBestCost; + pMatch[m].length = nBestMatchLen; + pMatch[m].offset = nBestMatchOffset; /* not necessary */ + + if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) { + cost[i] = nBestCost; + pCompressor->best_match[i].length = nBestMatchLen; + pCompressor->best_match[i].offset = nBestMatchOffset; + + repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch; + + if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) { + repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot; + repmatch_opt[nBestUpdatedIndex].incoming_offset = i; + } + } + } + for (; m < NMATCHES_PER_OFFSET; m++) { + pSlotCost[m] = 0; + } + + if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) + nLastLiteralsOffset = i; + + prev_match[i] = nLastLiteralsOffset; + } + + int nIncomingOffset = -1; + for (i = nStartOffset; i < nEndOffset; ) { + if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) { + if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming; + int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming; + + pCompressor->best_match[i].length = pMatch->length; + pCompressor->best_match[i].offset = pMatch->offset; + cost[i] = *pSlotCost; + + if (repmatch_opt[i].expected_repmatch == 2) + repmatch_opt[i].expected_repmatch = 1; + } + else { + if (repmatch_opt[i].expected_repmatch == 2) + repmatch_opt[i].expected_repmatch = 0; + } + + nIncomingOffset = i; + i += pCompressor->best_match[i].length; + } + else { + i++; + } + } +} + +/** + * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without + * impacting the compression ratio + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * + * @return non-zero if the number of tokens was reduced, 0 if it wasn't + */ +static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int i; + int nNumLiterals = 0; + int nDidReduce = 0; + int nPreviousMatchOffset = -1; + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->best_match + i; + + if (pMatch->length >= MIN_MATCH_SIZE_V2) { + int nMatchLen = pMatch->length; + int nReduce = 0; + int nCurrentMatchOffset = i; + + if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: */ { + int nMatchOffset = pMatch->offset; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2; + int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16; + + if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) { + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost; + + if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) { + nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */; + } + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) { + /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will + * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is + * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current + * match command by literals, the output size will not increase and it will remove one command. */ + nReduce = 1; + } + } + else { + int nCurIndex = i + nMatchLen; + int nNextNumLiterals = 0; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;; + + do { + nCurIndex++; + nNextNumLiterals++; + } while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2); + + if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 || + pCompressor->best_match[nCurIndex].offset != nMatchOffset) { + nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */; + } + + if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) { + /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take + * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */ + nReduce = 1; + } + } + } + + if (nReduce) { + int j; + + for (j = 0; j < nMatchLen; j++) { + pCompressor->best_match[i + j].length = 0; + } + nNumLiterals += nMatchLen; + i += nMatchLen; + + nDidReduce = 1; + + if (nPreviousMatchOffset >= 0) { + repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0; + nPreviousMatchOffset = -1; + } + } + else { + if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX && + pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 && + (nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_VARLEN) { + /* Join */ + + pMatch->length += pCompressor->best_match[i + nMatchLen].length; + pCompressor->best_match[i + nMatchLen].offset = 0; + pCompressor->best_match[i + nMatchLen].length = -1; + continue; + } + + nNumLiterals = 0; + i += nMatchLen; + } + + nPreviousMatchOffset = nCurrentMatchOffset; + } + else { + nNumLiterals++; + i++; + } + } + + return nDidReduce; +} + +/** + * Emit block of compressed data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { + int i; + int nNumLiterals = 0; + int nInFirstLiteralOffset = 0; + int nOutOffset = 0; + int nCurNibbleOffset = -1, nCurFreeNibbles = 0; + int nRepMatchOffset = 0; + lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->best_match + i; + + if (pMatch->length >= MIN_MATCH_SIZE_V2) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2; + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; + int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen; + int nTokenOffsetMode; + int nOffsetSize; + + if (nMatchOffset == nRepMatchOffset) { + nTokenOffsetMode = 0xe0; + nOffsetSize = 0; + } + else { + if (nMatchOffset <= 32) { + nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1); + nOffsetSize = 4; + } + else if (nMatchOffset <= 512) { + nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3); + nOffsetSize = 8; + } + else if (nMatchOffset <= (8192 + 512)) { + nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7); + nOffsetSize = 12; + } + else { + nTokenOffsetMode = 0xc0; + nOffsetSize = 16; + } + } + + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen); + + if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize) + return -1; + if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET) + return -1; + + pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen; + nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); + if (nOutOffset < 0) return -1; + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f); + if (nOutOffset < 0) return -1; + } + else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) { + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + } + else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f); + if (nOutOffset < 0) return -1; + pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff; + } + else if (nTokenOffsetMode == 0xc0) { + pOutData[nOutOffset++] = (-nMatchOffset) >> 8; + pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; + } + nRepMatchOffset = nMatchOffset; + + nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen); + if (nOutOffset < 0) return -1; + + i += nMatchLen; + + pCompressor->num_commands++; + } + else { + if (nNumLiterals == 0) + nInFirstLiteralOffset = i; + nNumLiterals++; + i++; + } + } + + { + int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; + int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3); + + if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize) + return -1; + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) + pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47; + else + pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00; + nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); + if (nOutOffset < 0) return -1; + + if (nNumLiterals != 0) { + memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); + nOutOffset += nNumLiterals; + nNumLiterals = 0; + } + + pCompressor->num_commands++; + } + + if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { + /* Emit EOD marker for raw block */ + + if (nOutOffset >= nMaxOutDataSize) + return -1; + pOutData[nOutOffset++] = 0; /* Match offset */ + + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */ + if (nOutOffset < 0) return -1; + + if ((nOutOffset + 1) > nMaxOutDataSize) + return -1; + + pOutData[nOutOffset++] = 232; /* EOD match length byte */ + } + + if (nCurNibbleOffset != -1) { + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0); + if (nOutOffset < 0 || nCurNibbleOffset != -1) + return -1; + } + + return nOutOffset; +} + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + int nDidReduce; + int nPasses = 0; + do { + nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + nPasses++; + } while (nDidReduce && nPasses < 20); + + return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); +} diff --git a/src/shrink_block_v2.h b/src/shrink_block_v2.h new file mode 100644 index 0000000..4a50ea1 --- /dev/null +++ b/src/shrink_block_v2.h @@ -0,0 +1,53 @@ +/* + * shrink_v2.h - LZSA2 block compressor definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_BLOCK_V2_H +#define _SHRINK_BLOCK_V2_H + +/* Forward declarations */ +typedef struct _lzsa_compressor lzsa_compressor; + +/** + * Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); + +#endif /* _SHRINK_BLOCK_V2_H */ diff --git a/src/shrink_context.c b/src/shrink_context.c new file mode 100644 index 0000000..ff1e6d5 --- /dev/null +++ b/src/shrink_context.c @@ -0,0 +1,194 @@ +/* + * shrink_context.c - compression context implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "shrink_context.h" +#include "shrink_block_v1.h" +#include "shrink_block_v2.h" +#include "format.h" +#include "matchfinder.h" + +/** + * Initialize compression context + * + * @param pCompressor compression context to initialize + * @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress) + * @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE) + * @param nFlags compression flags + * + * @return 0 for success, non-zero for failure + */ +int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) { + int nResult; + int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2; + + nResult = divsufsort_init(&pCompressor->divsufsort_context); + pCompressor->intervals = NULL; + pCompressor->pos_data = NULL; + pCompressor->open_intervals = NULL; + pCompressor->match = NULL; + pCompressor->best_match = NULL; + pCompressor->slot_cost = NULL; + pCompressor->repmatch_opt = NULL; + pCompressor->min_match_size = nMinMatchSize; + if (pCompressor->min_match_size < nMinMatchSizeForFormat) + pCompressor->min_match_size = nMinMatchSizeForFormat; + else if (pCompressor->min_match_size > 5) + pCompressor->min_match_size = 5; + pCompressor->format_version = nFormatVersion; + pCompressor->flags = nFlags; + pCompressor->num_commands = 0; + + if (!nResult) { + pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + + if (pCompressor->intervals) { + pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + + if (pCompressor->pos_data) { + pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int)); + + if (pCompressor->open_intervals) { + pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match)); + + if (pCompressor->match) { + if (pCompressor->format_version == 2) { + pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match)); + + if (pCompressor->best_match) { + pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int)); + + if (pCompressor->slot_cost) { + pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt)); + + if (pCompressor->repmatch_opt) + return 0; + } + } + } + else { + return 0; + } + } + } + } + } + } + + lzsa_compressor_destroy(pCompressor); + return 100; +} + +/** + * Clean up compression context and free up any associated resources + * + * @param pCompressor compression context to clean up + */ +void lzsa_compressor_destroy(lzsa_compressor *pCompressor) { + divsufsort_destroy(&pCompressor->divsufsort_context); + + if (pCompressor->repmatch_opt) { + free(pCompressor->repmatch_opt); + pCompressor->repmatch_opt = NULL; + } + + if (pCompressor->slot_cost) { + free(pCompressor->slot_cost); + pCompressor->slot_cost = NULL; + } + + if (pCompressor->best_match) { + free(pCompressor->best_match); + pCompressor->best_match = NULL; + } + + if (pCompressor->match) { + free(pCompressor->match); + pCompressor->match = NULL; + } + + if (pCompressor->open_intervals) { + free(pCompressor->open_intervals); + pCompressor->open_intervals = NULL; + } + + if (pCompressor->pos_data) { + free(pCompressor->pos_data); + pCompressor->pos_data = NULL; + } + + if (pCompressor->intervals) { + free(pCompressor->intervals); + pCompressor->intervals = NULL; + } +} + +/** + * Compress one block of data + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param nPreviousBlockSize number of previously compressed bytes (or 0 for none) + * @param nInDataSize number of input bytes to compress + * @param pOutData pointer to output buffer + * @param nMaxOutDataSize maximum size of output buffer, in bytes + * + * @return size of compressed data in output buffer, or -1 if the data is uncompressible + */ +int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { + if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) + return -1; + if (nPreviousBlockSize) { + lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize); + } + lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + + if (pCompressor->format_version == 1) { + return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize); + } + else if (pCompressor->format_version == 2) { + return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize); + } + else { + return -1; + } +} + +/** + * Get the number of compression commands issued in compressed data blocks + * + * @return number of commands + */ +int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) { + return pCompressor->num_commands; +} diff --git a/src/shrink_context.h b/src/shrink_context.h new file mode 100644 index 0000000..848e01f --- /dev/null +++ b/src/shrink_context.h @@ -0,0 +1,123 @@ +/* + * shrink_context.h - compression context definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_CONTEXT_H +#define _SHRINK_CONTEXT_H + +#include "divsufsort.h" + +#define LCP_BITS 15 +#define LCP_MAX (1U<<(LCP_BITS - 1)) +#define LCP_SHIFT (32-LCP_BITS) +#define LCP_MASK (((1U< + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "shrink_inmem.h" +#include "shrink_context.h" +#include "frame.h" +#include "format.h" +#include "lib.h" + +/** + * Get maximum compressed size of input(source) data + * + * @param pFileData pointer to input(source) data + * @param nFileSize input(source) size in bytes + * + * @return maximum compressed size + */ +size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) { + return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */; +} + +/** + * Compress memory + * + * @param pInputData pointer to input(source) data to compress + * @param pOutBuffer buffer for compressed data + * @param nInputSize input(source) size in bytes + * @param nMaxOutBufferSize maximum capacity of compression buffer + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * + * @return actual compressed size, or -1 for error + */ +size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) { + lzsa_compressor compressor; + size_t nOriginalSize = 0; + size_t nCompressedSize = 0L; + int nResult; + int nError = 0; + + nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags); + if (nResult != 0) { + return -1; + } + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion); + if (nHeaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + nCompressedSize += nHeaderSize; + } + } + + int nPreviousBlockSize = 0; + int nNumBlocks = 0; + + while (nOriginalSize < nInputSize && !nError) { + int nInDataSize; + + nInDataSize = (int)(nInputSize - nOriginalSize); + if (nInDataSize > BLOCK_SIZE) + nInDataSize = BLOCK_SIZE; + + if (nInDataSize > 0) { + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) { + nError = LZSA_ERROR_RAW_TOOLARGE; + break; + } + + int nOutDataSize; + int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */)); + + if (nOutDataEnd > BLOCK_SIZE) + nOutDataEnd = BLOCK_SIZE; + + nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + lzsa_get_frame_size() + nCompressedSize, nOutDataEnd); + if (nOutDataSize >= 0) { + /* Write compressed block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + nCompressedSize += nBlockheaderSize; + + nOriginalSize += nInDataSize; + nCompressedSize += nOutDataSize; + } + } + } + else { + /* Write uncompressible, literal block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nError = LZSA_ERROR_RAW_UNCOMPRESSED; + break; + } + + int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize))) + nError = LZSA_ERROR_DST; + else { + memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize); + + nOriginalSize += nInDataSize; + nCompressedSize += nBlockheaderSize + nInDataSize; + } + } + } + + nPreviousBlockSize = nInDataSize; + nNumBlocks++; + } + } + + if (!nError) { + int nFooterSize; + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nFooterSize = 0; + } + else { + nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize)); + if (nFooterSize < 0) + nError = LZSA_ERROR_COMPRESSION; + } + + nCompressedSize += nFooterSize; + } + + lzsa_compressor_destroy(&compressor); + + if (nError) { + return -1; + } + else { + return nCompressedSize; + } +} + diff --git a/src/shrink_inmem.h b/src/shrink_inmem.h new file mode 100644 index 0000000..d8b9bd3 --- /dev/null +++ b/src/shrink_inmem.h @@ -0,0 +1,64 @@ +/* + * shrink_inmem.h - in-memory compression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_INMEM_H +#define _SHRINK_INMEM_H + +#include + +/** + * Get maximum compressed size of input(source) data + * + * @param pFileData pointer to input(source) data + * @param nFileSize input(source) size in bytes + * + * @return maximum compressed size + */ +size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize); + +/** + * Compress memory + * + * @param pInputData pointer to input(source) data to compress + * @param pOutBuffer buffer for compressed data + * @param nInputSize input(source) size in bytes + * @param nMaxOutBufferSize maximum capacity of compression buffer + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * + * @return actual compressed size, or -1 for error + */ +size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion); + +#endif /* _SHRINK_INMEM_H */ diff --git a/src/shrink_streaming.c b/src/shrink_streaming.c new file mode 100644 index 0000000..9b5d4fd --- /dev/null +++ b/src/shrink_streaming.c @@ -0,0 +1,285 @@ +/* + * shrink_streaming.h - streaming compression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include "shrink_streaming.h" +#include "format.h" +#include "frame.h" +#include "lib.h" + +/*-------------- File API -------------- */ + +/** + * Compress file + * + * @param pszInFilename name of input(source) file to compress + * @param pszOutFilename name of output(compressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) { + lzsa_stream_t inStream, outStream; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { + return LZSA_ERROR_SRC; + } + + if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) { + inStream.close(&inStream); + return LZSA_ERROR_DST; + } + + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + + if (nStatus) { + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; + } + + nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount); + + lzsa_dictionary_free(&pDictionaryData); + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; +} + +/*-------------- Streaming API -------------- */ + +/** + * Compress stream + * + * @param pInStream input(source) stream to compress + * @param pOutStream output(compressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) { + unsigned char *pInData, *pOutData; + lzsa_compressor compressor; + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + int nResult; + unsigned char cFrameData[16]; + int nError = 0; + + pInData = (unsigned char*)malloc(BLOCK_SIZE * 2); + if (!pInData) { + return LZSA_ERROR_MEMORY; + } + memset(pInData, 0, BLOCK_SIZE * 2); + + pOutData = (unsigned char*)malloc(BLOCK_SIZE); + if (!pOutData) { + free(pInData); + pInData = NULL; + + return LZSA_ERROR_MEMORY; + } + memset(pOutData, 0, BLOCK_SIZE); + + nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags); + if (nResult != 0) { + free(pOutData); + pOutData = NULL; + + free(pInData); + pInData = NULL; + + return LZSA_ERROR_MEMORY; + } + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion); + if (nHeaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize) + nError = LZSA_ERROR_DST; + nCompressedSize += (long long)nHeaderSize; + } + } + + int nPreviousBlockSize = 0; + int nNumBlocks = 0; + + while (!pInStream->eof(pInStream) && !nError) { + int nInDataSize; + + if (nPreviousBlockSize) { + memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize); + } + else if (nDictionaryDataSize && pDictionaryData) { + nPreviousBlockSize = nDictionaryDataSize; + memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize); + } + + nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE); + if (nInDataSize > 0) { + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) { + nError = LZSA_ERROR_RAW_TOOLARGE; + break; + } + nDictionaryDataSize = 0; + + int nOutDataSize; + + nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize); + if (nOutDataSize >= 0) { + /* Write compressed block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + nCompressedSize += (long long)nBlockheaderSize; + if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) { + nError = LZSA_ERROR_DST; + } + } + } + + if (!nError) { + if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) { + nError = LZSA_ERROR_DST; + } + else { + nOriginalSize += (long long)nInDataSize; + nCompressedSize += (long long)nOutDataSize; + } + } + } + else { + /* Write uncompressible, literal block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nError = LZSA_ERROR_RAW_UNCOMPRESSED; + break; + } + + int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) { + nError = LZSA_ERROR_DST; + } + else { + if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) { + nError = LZSA_ERROR_DST; + } + else { + nOriginalSize += (long long)nInDataSize; + nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize; + } + } + } + } + + nPreviousBlockSize = nInDataSize; + nNumBlocks++; + } + + if (!nError && !pInStream->eof(pInStream)) { + if (progress) + progress(nOriginalSize, nCompressedSize); + } + } + + if (!nError) { + int nFooterSize; + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nFooterSize = 0; + } + else { + nFooterSize = lzsa_encode_footer_frame(cFrameData, 16); + if (nFooterSize < 0) + nError = LZSA_ERROR_COMPRESSION; + } + + if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize) + nError = LZSA_ERROR_DST; + nCompressedSize += (long long)nFooterSize; + } + + if (progress) + progress(nOriginalSize, nCompressedSize); + + int nCommandCount = lzsa_compressor_get_command_count(&compressor); + lzsa_compressor_destroy(&compressor); + + free(pOutData); + pOutData = NULL; + + free(pInData); + pInData = NULL; + + if (nError) { + return nError; + } + else { + if (pOriginalSize) + *pOriginalSize = nOriginalSize; + if (pCompressedSize) + *pCompressedSize = nCompressedSize; + if (pCommandCount) + *pCommandCount = nCommandCount; + return LZSA_OK; + } +} diff --git a/src/shrink_streaming.h b/src/shrink_streaming.h new file mode 100644 index 0000000..9acc46d --- /dev/null +++ b/src/shrink_streaming.h @@ -0,0 +1,86 @@ +/* + * shrink_streaming.h - streaming compression definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _SHRINK_STREAMING_H +#define _SHRINK_STREAMING_H + +#include "stream.h" + +/* Forward declaration */ +typedef enum _lzsa_status_t lzsa_status_t; + +/*-------------- File API -------------- */ + +/** + * Compress file + * + * @param pszInFilename name of input(source) file to compress + * @param pszOutFilename name of output(compressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); + +/*-------------- Streaming API -------------- */ + +/** + * Compress stream + * + * @param pInStream input(source) stream to compress + * @param pOutStream output(compressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); + +#endif /* _SHRINK_STREAMING_H */