Split code, add automated tests, update LZSA2

This commit is contained in:
Emmanuel Marty 2019-06-07 23:15:40 +02:00 committed by GitHub
parent 45cb124c4d
commit b4e3c07d3a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
28 changed files with 4002 additions and 216 deletions

101
src/dictionary.c Normal file
View File

@ -0,0 +1,101 @@
/*
* dictionary.c - dictionary implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include "format.h"
#include "lib.h"
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
unsigned char *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
if (!pDictionaryData) {
return LZSA_ERROR_MEMORY;
}
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pDictionaryData);
pDictionaryData = NULL;
return LZSA_ERROR_DICTIONARY;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
*ppDictionaryData = pDictionaryData;
*pDictionaryDataSize = nDictionaryDataSize;
return LZSA_OK;
}
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
ppDictionaryData = NULL;
}
}

56
src/dictionary.h Normal file
View File

@ -0,0 +1,56 @@
/*
* dictionary.h - dictionary definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _DICTIONARY_H
#define _DICTIONARY_H
#include <stdlib.h>
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
#endif /* _DICTIONARY_H */

217
src/expand_block_v1.c Normal file
View File

@ -0,0 +1,217 @@
/*
* expand_v1.c - LZSA1 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v1.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nLiterals) += nByte;
if (nByte == 250) {
if (pInBlock < pInBlockEnd) {
(*nLiterals) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 249) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLiterals) = ((unsigned int)*pInBlock++);
(*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
unsigned int nByte;
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
nByte = *pInBlock++;
(*nMatchLen) += nByte;
if (nByte == 239) {
if (pInBlock < pInBlockEnd) {
(*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
}
else {
return -1;
}
}
else if (nByte == 238) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nMatchLen) = ((unsigned int)*pInBlock++);
(*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
*ppInBlock = pInBlock;
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 8);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V1) {
if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned int nMatchOffset;
nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
if (token & 0x80) {
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
}
nMatchOffset++;
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 8);
memcpy(pCurOutData + 16, pSrc + 16, 2);
pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V1;
if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
return -1;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

49
src/expand_block_v1.h Normal file
View File

@ -0,0 +1,49 @@
/*
* expand_v1.h - LZSA1 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_V1_H
#define _EXPAND_V1_H
/**
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V1_H */

242
src/expand_block_v2.c Normal file
View File

@ -0,0 +1,242 @@
/*
* expand_v2.c - LZSA2 block decompressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "format.h"
#include "expand_block_v2.h"
#ifdef _MSC_VER
#define FORCE_INLINE __forceinline
#else /* _MSC_VER */
#define FORCE_INLINE __attribute__((always_inline))
#endif /* _MSC_VER */
static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
if ((*nCurNibbles ^= 1) != 0) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nibbles) = *pInBlock++;
*ppInBlock = pInBlock;
(*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
return 0;
}
else {
return -1;
}
}
(*nValue) = (unsigned int)((*nibbles) & 0x0f);
return 0;
}
static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
unsigned int nValue;
if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
(*nLength) += nValue;
if (nValue == 15) {
const unsigned char *pInBlock = *ppInBlock;
if (pInBlock < pInBlockEnd) {
(*nLength) += ((unsigned int)*pInBlock++);
if ((*nLength) == 257) {
if ((pInBlock + 1) < pInBlockEnd) {
(*nLength) = ((unsigned int)*pInBlock++);
(*nLength) |= (((unsigned int)*pInBlock++) << 8);
}
else {
return -1;
}
}
}
else {
return -1;
}
*ppInBlock = pInBlock;
}
return 0;
}
else {
return -1;
}
}
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
unsigned char *pCurOutData = pOutData + nOutDataOffset;
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
int nCurNibbles = 0;
unsigned char nibbles;
int nMatchOffset = 0;
while (pInBlock < pInBlockEnd) {
const unsigned char token = *pInBlock++;
unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pInBlock, 4);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
if (nLiterals == LITERALS_RUN_LEN_V2) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
return -1;
}
if (nLiterals != 0) {
if ((pInBlock + nLiterals) <= pInBlockEnd &&
(pCurOutData + nLiterals) <= pOutDataEnd) {
memcpy(pCurOutData, pInBlock, nLiterals);
pInBlock += nLiterals;
pCurOutData += nLiterals;
}
else {
return -1;
}
}
}
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
unsigned char nOffsetMode = token & 0xc0;
unsigned int nValue;
switch (nOffsetMode) {
case 0x00:
/* 5 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = nValue;
nMatchOffset |= ((token & 0x20) >> 1);
nMatchOffset ^= 0x1f;
nMatchOffset++;
break;
case 0x40:
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x1ff;
nMatchOffset++;
break;
case 0x80:
/* 13 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (nValue << 8);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
nMatchOffset ^= 0x1fff;
nMatchOffset += (512 + 1);
break;
default:
/* Check if this is a 16 bit offset or a rep-match */
if ((token & 0x20) == 0) {
/* 16 bit offset */
nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
nMatchOffset |= (unsigned int)(*pInBlock++);
nMatchOffset ^= 0xffff;
nMatchOffset++;
}
break;
}
const unsigned char *pSrc = pCurOutData - nMatchOffset;
if (pSrc >= pOutData) {
unsigned int nMatchLen = (unsigned int)(token & 0x07);
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
memcpy(pCurOutData, pSrc, 8);
memcpy(pCurOutData + 8, pSrc + 8, 2);
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
}
else {
nMatchLen += MIN_MATCH_SIZE_V2;
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
return -1;
}
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
const unsigned char *pCopySrc = pSrc;
unsigned char *pCopyDst = pCurOutData;
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
do {
memcpy(pCopyDst, pCopySrc, 16);
pCopySrc += 16;
pCopyDst += 16;
} while (pCopyDst < pCopyEndDst);
pCurOutData += nMatchLen;
}
else {
while (nMatchLen) {
*pCurOutData++ = *pSrc++;
nMatchLen--;
}
}
}
else {
return -1;
}
}
}
else {
return -1;
}
}
}
return (int)(pCurOutData - (pOutData + nOutDataOffset));
}

49
src/expand_block_v2.h Normal file
View File

@ -0,0 +1,49 @@
/*
* expand_v2.h - LZSA2 block decompressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_V2_H
#define _EXPAND_V2_H
/**
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V2_H */

57
src/expand_context.c Normal file
View File

@ -0,0 +1,57 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_context.h"
#include "expand_block_v1.h"
#include "expand_block_v2.h"
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
if (nFormatVersion == 1)
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
return -1;
}

51
src/expand_context.h Normal file
View File

@ -0,0 +1,51 @@
/*
* expand_context.h - decompressor context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_CONTEXT_H
#define _EXPAND_CONTEXT_H
#include <stdlib.h>
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_CONTEXT_H */

160
src/expand_inmem.c Normal file
View File

@ -0,0 +1,160 @@
/*
* expand_inmem.c - in-memory decompression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_inmem.h"
#include "lib.h"
#include "frame.h"
#define BLOCK_SIZE 65536
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
const unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
int nFormatVersion = 0;
size_t nMaxDecompressedSize = 0;
const int nHeaderSize = lzsa_get_header_size();
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
/* Add one potentially full block to the decompressed size */
nMaxDecompressedSize += BLOCK_SIZE;
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
pCurFileData += nBlockDataSize;
}
return nMaxDecompressedSize;
}
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) {
const unsigned char *pCurFileData = pFileData;
const unsigned char *pEndFileData = pCurFileData + nFileSize;
unsigned char *pCurOutBuffer = pOutBuffer;
const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
int nFormatVersion = 0;
int nPreviousBlockSize;
const int nHeaderSize = lzsa_get_header_size();
/* Check header */
if ((pCurFileData + nHeaderSize) > pEndFileData ||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
return -1;
pCurFileData += nHeaderSize;
nPreviousBlockSize = 0;
while (pCurFileData < pEndFileData) {
unsigned int nBlockDataSize = 0;
int nIsUncompressed = 0;
const int nFrameSize = lzsa_get_frame_size();
/* Decode frame header */
if ((pCurFileData + nFrameSize) > pEndFileData ||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
return -1;
pCurFileData += nFrameSize;
if (!nBlockDataSize)
break;
if (!nIsUncompressed) {
int nDecompressedSize;
/* Decompress block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
if (nDecompressedSize < 0)
return -1;
pCurOutBuffer += nDecompressedSize;
nPreviousBlockSize = nDecompressedSize;
}
else {
/* Copy uncompressed block */
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
return -1;
memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
pCurOutBuffer += nBlockDataSize;
}
pCurFileData += nBlockDataSize;
}
*pFormatVersion = nFormatVersion;
return (int)(pCurOutBuffer - pOutBuffer);
}

61
src/expand_inmem.h Normal file
View File

@ -0,0 +1,61 @@
/*
* expand_inmem.h - in-memory decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_INMEM_H
#define _EXPAND_INMEM_H
#include <stdlib.h>
/**
* Get maximum decompressed size of compressed data
*
* @param pFileData compressed data
* @param nFileSize compressed size in bytes
*
* @return maximum decompressed size
*/
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
/**
* Decompress data in memory
*
* @param pFileData compressed data
* @param pOutBuffer buffer for decompressed data
* @param nFileSize compressed size in bytes
* @param nMaxOutBufferSize maximum capacity of decompression buffer
* @param pFormatVersion pointer to format version, updated if this function is successful
*
* @return actual decompressed size, or -1 for error
*/
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
#endif /* _EXPAND_INMEM_H */

243
src/expand_streaming.c Normal file
View File

@ -0,0 +1,243 @@
/*
* expand_streaming.c - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "expand_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
unsigned char cFrameData[16];
unsigned char *pInBlock;
unsigned char *pOutData;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
return LZSA_ERROR_SRC;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
return LZSA_ERROR_FORMAT;
}
nCompressedSize += (long long)nHeaderSize;
}
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
return LZSA_ERROR_MEMORY;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
return LZSA_ERROR_MEMORY;
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nDecompressionError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPrevDecompressedSize = nDictionaryDataSize;
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = LZSA_ERROR_FORMAT;
nBlockSize = 0;
}
nCompressedSize += (long long)nFrameSize;
}
else {
nDecompressionError = LZSA_ERROR_SRC;
nBlockSize = 0;
}
}
else {
if (!nNumBlocks)
nBlockSize = BLOCK_SIZE;
else
nBlockSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
nDecompressionError = LZSA_ERROR_FORMAT;
break;
}
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
if (nReadBytes > 2)
nReadBytes -= 2;
else
nReadBytes = 0;
nBlockSize = (unsigned int)nReadBytes;
}
if (nReadBytes == nBlockSize) {
nCompressedSize += (long long)nReadBytes;
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
nDecompressionError = LZSA_ERROR_DST;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
nNumBlocks++;
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
*pOriginalSize = nOriginalSize;
*pCompressedSize = nCompressedSize;
return nDecompressionError;
}

78
src/expand_streaming.h Normal file
View File

@ -0,0 +1,78 @@
/*
* expand_streaming.h - streaming decompression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _EXPAND_STREAMING_H
#define _EXPAND_STREAMING_H
#include "stream.h"
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Streaming API -------------- */
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
#endif /* _EXPAND_STREAMING_H */

View File

@ -38,6 +38,8 @@
#define MAX_VARLEN 0xffff
#define BLOCK_SIZE 65536
#define MIN_MATCH_SIZE_V1 3
#define LITERALS_RUN_LEN_V1 7
#define MATCH_RUN_LEN_V1 15

View File

@ -31,6 +31,7 @@
*/
#include <stdlib.h>
#include <string.h>
#include "frame.h"
#define LZSA_ID_0 0x7b

208
src/lib.h
View File

@ -33,11 +33,19 @@
#ifndef _LIB_H
#define _LIB_H
#include "divsufsort.h"
#include "stream.h"
#include "dictionary.h"
#include "frame.h"
#include "format.h"
#include "shrink_context.h"
#include "shrink_streaming.h"
#include "shrink_inmem.h"
#include "expand_context.h"
#include "expand_streaming.h"
#include "expand_inmem.h"
/** High level status for compression and decompression */
typedef enum {
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
@ -58,200 +66,4 @@ typedef enum {
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
/*-------------- Top level API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Streaming API -------------- */
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Block compression API --------------*/
#define LCP_BITS 15
#define LCP_MAX (1<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_MASK (((1<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1<<LCP_SHIFT) - 1)
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define LEAVE_ALONE_MATCH_SIZE 1000
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
#define MODESWITCH_PENALTY 1
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/** One rep-match slot (for LZSA2) */
typedef struct _lzsa_repmatch_opt {
int incoming_offset;
short best_slot_for_incoming;
short expected_repmatch;
} lzsa_repmatch_opt;
/** Compression context */
typedef struct _lsza_compressor {
divsufsort_ctx_t divsufsort_context;
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *best_match;
int *slot_cost;
lzsa_repmatch_opt *repmatch_opt;
int min_match_size;
int format_version;
int flags;
int num_commands;
} lsza_compressor;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor);
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _LIB_H */

View File

@ -41,7 +41,6 @@
#include <sys/time.h>
#endif
#include "lib.h"
#include "inmem.h"
#define OPT_VERBOSE 1
#define OPT_RAW 2
@ -115,7 +114,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
nStartTime = do_get_time();
}
nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
if ((nOptions & OPT_VERBOSE)) {
nEndTime = do_get_time();
@ -345,7 +344,369 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con
/*---------------------------------------------------------------------------*/
static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
size_t nIndex = 0;
int nMatchProbability = (int)(fMatchProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
pBuffer[nIndex++] = rand() % nNumLiteralValues;
while (nIndex < nBufferSize) {
if ((rand() & 1023) >= nMatchProbability) {
size_t nLiteralCount = rand() & 127;
if (nLiteralCount > (nBufferSize - nIndex))
nLiteralCount = nBufferSize - nIndex;
while (nLiteralCount--)
pBuffer[nIndex++] = rand() % nNumLiteralValues;
}
else {
size_t nMatchLength = nMinMatchSize + (rand() & 1023);
size_t nMatchOffset;
if (nMatchLength > (nBufferSize - nIndex))
nMatchLength = nBufferSize - nIndex;
if (nMatchLength > nIndex)
nMatchLength = nIndex;
if (nMatchLength < nIndex)
nMatchOffset = rand() % (nIndex - nMatchLength);
else
nMatchOffset = 0;
while (nMatchLength--) {
pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset];
nIndex++;
}
}
}
}
static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
size_t nIndex = 0;
int nXorProbability = (int)(fXorProbability * 1023.0f);
srand(nSeed);
if (nIndex >= nBufferSize) return;
while (nIndex < nBufferSize) {
if ((rand() & 1023) < nXorProbability) {
pBuffer[nIndex] ^= 0xff;
}
nIndex++;
}
}
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
unsigned char *pGeneratedData;
unsigned char *pCompressedData;
unsigned char *pTmpCompressedData;
unsigned char *pTmpDecompressedData;
size_t nGeneratedDataSize;
size_t nMaxCompressedDataSize;
unsigned int nSeed = 123;
int nFlags;
int i;
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
if (!pGeneratedData) {
fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
return 100;
}
nMaxCompressedDataSize = lzsa_get_max_compressed_size_inmem(4 * BLOCK_SIZE);
pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
if (!pCompressedData) {
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
return 100;
}
pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
if (!pTmpCompressedData) {
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
return 100;
}
pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
if (!pTmpDecompressedData) {
free(pTmpCompressedData);
pTmpCompressedData = NULL;
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
return 100;
}
memset(pGeneratedData, 0, 4 * BLOCK_SIZE);
memset(pCompressedData, 0, nMaxCompressedDataSize);
memset(pTmpCompressedData, 0, nMaxCompressedDataSize);
/* Test compressing with a too small buffer to do anything, expect to fail cleanly */
for (i = 0; i < 12; i++) {
generate_compressible_data(pGeneratedData, i, nMinMatchSize, nSeed, 256, 0.5f);
lzsa_compress_inmem(pGeneratedData, pCompressedData, i, i, nFlags, nMinMatchSize, nFormatVersion);
}
size_t nDataSizeStep = 128;
float fProbabilitySizeStep = 0.0005f;
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (4 * BLOCK_SIZE); nGeneratedDataSize += nDataSizeStep) {
float fMatchProbability;
fprintf(stdout, "size %zd", nGeneratedDataSize);
for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
float fXorProbability;
fputc('.', stdout);
fflush(stdout);
for (i = 0; i < 12; i++) {
/* Generate data to compress */
generate_compressible_data(pGeneratedData, nGeneratedDataSize, nMinMatchSize, nSeed, nNumLiteralValues[i], fMatchProbability);
/* Try to compress it, expected to succeed */
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
nFlags, nMinMatchSize, nFormatVersion);
if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
pTmpCompressedData = NULL;
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
/* Try to decompress it, expected to succeed */
size_t nActualDecompressedSize;
int nDecFormatVersion = 0;
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
if (nActualDecompressedSize == -1) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
pTmpCompressedData = NULL;
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
pTmpCompressedData = NULL;
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
return 100;
}
/* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */
for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) {
memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize);
xor_data(pTmpCompressedData + lzsa_get_header_size() + lzsa_get_frame_size(), nActualCompressedSize - lzsa_get_header_size() - lzsa_get_frame_size() - lzsa_get_frame_size() /* footer */, nSeed, fXorProbability);
lzsa_decompress_inmem(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
}
}
nSeed++;
}
fputc(10, stdout);
fflush(stdout);
nDataSizeStep <<= 1;
if (nDataSizeStep > (128 * 4096))
nDataSizeStep = 128 * 4096;
fProbabilitySizeStep *= 1.25;
if (fProbabilitySizeStep > (0.0005f * 4096))
fProbabilitySizeStep = 0.0005f * 4096;
}
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
pTmpCompressedData = NULL;
free(pCompressedData);
pCompressedData = NULL;
free(pGeneratedData);
pGeneratedData = NULL;
fprintf(stdout, "All tests passed.\n");
return 0;
}
/*---------------------------------------------------------------------------*/
static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
size_t nFileSize, nMaxCompressedSize;
unsigned char *pFileData;
unsigned char *pCompressedData;
int nFlags;
int i;
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (pszDictionaryFilename) {
fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
return 100;
}
/* Read the whole original file in memory */
FILE *f_in = fopen(pszInFilename, "rb");
if (!f_in) {
fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
return 100;
}
fseek(f_in, 0, SEEK_END);
nFileSize = (size_t)ftell(f_in);
fseek(f_in, 0, SEEK_SET);
pFileData = (unsigned char*)malloc(nFileSize);
if (!pFileData) {
fclose(f_in);
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
return 100;
}
if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
free(pFileData);
fclose(f_in);
fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
return 100;
}
fclose(f_in);
/* Allocate max compressed size */
nMaxCompressedSize = lzsa_get_max_compressed_size_inmem(nFileSize);
pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
if (!pCompressedData) {
free(pFileData);
fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
return 100;
}
memset(pCompressedData + 1024, 0, nMaxCompressedSize);
long long nBestCompTime = -1;
size_t nActualCompressedSize = 0;
size_t nRightGuardPos = nMaxCompressedSize;
for (i = 0; i < 5; i++) {
unsigned char nGuard = 0x33 + i;
int j;
/* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */
memset(pCompressedData, nGuard, 1024);
memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024);
long long t0 = do_get_time();
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
long long t1 = do_get_time();
if (nActualCompressedSize == -1) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "compression error\n");
return 100;
}
long long nCurDecTime = t1 - t0;
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
nBestCompTime = nCurDecTime;
/* Check guard bytes before the output buffer */
for (j = 0; j < 1024; j++) {
if (pCompressedData[j] != nGuard) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024);
return 100;
}
}
/* Check guard bytes after the output buffer */
for (j = 0; j < 1024; j++) {
if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j);
return 100;
}
}
nRightGuardPos = nActualCompressedSize;
}
if (pszOutFilename) {
FILE *f_out;
/* Write whole compressed file out */
f_out = fopen(pszOutFilename, "wb");
if (f_out) {
fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out);
fclose(f_out);
}
}
free(pCompressedData);
free(pFileData);
fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
return 0;
}
/*---------------------------------------------------------------------------*/
static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
size_t nFileSize, nMaxDecompressedSize;
unsigned char *pFileData;
unsigned char *pDecompressedData;
@ -389,7 +750,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
if (nOptions & OPT_RAW)
nMaxDecompressedSize = 65536;
else
nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize);
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
if (nMaxDecompressedSize == -1) {
free(pFileData);
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
@ -413,7 +774,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
if (nOptions & OPT_RAW)
nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
else
nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
long long t1 = do_get_time();
if (nActualDecompressedSize == -1) {
free(pDecompressedData);
@ -490,7 +851,15 @@ int main(int argc, char **argv) {
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-bench")) {
else if (!strcmp(argv[i], "-cbench")) {
if (!bCommandDefined) {
bCommandDefined = true;
cCommand = 'B';
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-dbench")) {
if (!bCommandDefined) {
bCommandDefined = true;
cCommand = 'b';
@ -498,6 +867,14 @@ int main(int argc, char **argv) {
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-test")) {
if (!bCommandDefined) {
bCommandDefined = true;
cCommand = 't';
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-D")) {
if (!pszDictionaryFilename && (i + 1) < argc) {
pszDictionaryFilename = argv[i + 1];
@ -616,12 +993,18 @@ int main(int argc, char **argv) {
}
}
if (!bArgsError && cCommand == 't') {
return do_self_test(nOptions, nMinMatchSize, nFormatVersion);
}
if (bArgsError || !pszInFilename || !pszOutFilename) {
fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
fprintf(stderr, " -c: check resulting stream after compressing\n");
fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -bench: benchmary in-memory decompression\n");
fprintf(stderr, " -cbench: benchmary in-memory compression\n");
fprintf(stderr, " -dbench: benchmary in-memory decompression\n");
fprintf(stderr, " -test: run automated self-tests\n");
fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -f <value>: LZSA compression format (1-2)\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
@ -643,8 +1026,11 @@ int main(int argc, char **argv) {
else if (cCommand == 'd') {
return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
}
else if (cCommand == 'B') {
return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
}
else if (cCommand == 'b') {
return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
}
else {
return 100;

View File

@ -29,7 +29,6 @@
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "matchfinder.h"
@ -45,7 +44,7 @@
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
@ -170,7 +169,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
*
* @return number of matches
*/
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref;
@ -248,7 +247,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match match;
int i;
@ -267,7 +266,7 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
int i;

View File

@ -35,7 +35,7 @@
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lsza_compressor lsza_compressor;
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Parse input data, build suffix array and overlaid data structures to speed up match finding
@ -46,7 +46,7 @@ typedef struct _lsza_compressor lsza_compressor;
*
* @return 0 for success, non-zero for failure
*/
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
/**
* Find matches at the specified offset in the input window
@ -58,7 +58,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
*
* @return number of matches
*/
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
/**
* Skip previously compressed bytes
@ -67,7 +67,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
* @param nStartOffset current offset in input window (typically 0)
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
*/
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
/**
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
@ -77,6 +77,6 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
#endif /* _MATCHFINDER_H */

459
src/shrink_block_v1.c Normal file
View File

@ -0,0 +1,459 @@
/*
* shrink_v1.c - LZSA1 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_block_v1.h"
#include "format.h"
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V1) {
return 0;
}
else {
if (nLength < 256)
return 8;
else {
if (nLength < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V1) {
if (nLength < 256)
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
else {
if (nLength < 512) {
pOutData[nOutOffset++] = 250;
pOutData[nOutOffset++] = nLength - 256;
}
else {
pOutData[nOutOffset++] = 249;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
if (nLength < MATCH_RUN_LEN_V1) {
return 0;
}
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
return 8;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512)
return 16;
else
return 24;
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
*/
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
if (nLength >= MATCH_RUN_LEN_V1) {
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
else {
if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
}
else {
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
nLastLiteralsOffset = nEndOffset;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nBestCost, nBestMatchLen, nBestMatchOffset;
int nLiteralsLen = nLastLiteralsOffset - i;
nBestCost = 8 + cost[i + 1];
if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
nBestCost += 8;
}
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nBestCost += MODESWITCH_PENALTY;
nBestMatchLen = 0;
nBestMatchOffset = 0;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + nMatchLen];
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V1)
nMatchRunLen = MATCH_RUN_LEN_V1;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
nCurCost += cost[i + k];
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
}
}
}
}
if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
nLastLiteralsOffset = i;
cost[i] = nBestCost;
pMatch->length = nBestMatchLen;
pMatch->offset = nBestMatchOffset;
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchLen = pMatch->length;
int nReduce = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
if (nTokenLongOffset) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
}
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

53
src/shrink_block_v1.h Normal file
View File

@ -0,0 +1,53 @@
/*
* shrink_v1.h - LZSA1 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V1_H
#define _SHRINK_BLOCK_V1_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V1_H */

727
src/shrink_block_v2.c Normal file
View File

@ -0,0 +1,727 @@
/*
* shrink_v2.c - LZSA2 block compressor implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "lib.h"
#include "shrink_block_v2.h"
#include "format.h"
/**
* Write 4-bit nibble to output (compressed) buffer
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
* @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
* @param nCurFreeNibbles current number of free nibbles in byte
* @param nNibbleValue value to write (0..15)
*/
static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) {
if (nOutOffset < 0) return -1;
if ((*nCurNibbleOffset) == -1) {
if (nOutOffset >= nMaxOutDataSize) return -1;
(*nCurNibbleOffset) = nOutOffset;
(*nCurFreeNibbles) = 2;
pOutData[nOutOffset++] = 0;
}
pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f);
(*nCurFreeNibbles)--;
if ((*nCurFreeNibbles) == 0) {
(*nCurNibbleOffset) = -1;
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent a literals length
*
* @param nLength literals length
*
* @return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v2(const int nLength) {
if (nLength < LITERALS_RUN_LEN_V2) {
return 0;
}
else {
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
return 4;
}
else {
if (nLength < 256)
return 4+8;
else {
return 4+24;
}
}
}
}
/**
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
if (nLength >= LITERALS_RUN_LEN_V2) {
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2);
}
else {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
if (nOutOffset < 0) return -1;
if (nLength < 256)
pOutData[nOutOffset++] = nLength - 18;
else {
pOutData[nOutOffset++] = 239;
pOutData[nOutOffset++] = nLength & 0xff;
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
*
* @return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v2(const int nLength) {
if (nLength < MATCH_RUN_LEN_V2) {
return 0;
}
else {
if (nLength < (MATCH_RUN_LEN_V2 + 15))
return 4;
else {
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
return 4+8;
else {
return 4 + 24;
}
}
}
}
/**
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
* room to write the bytes.
*
* @param pOutData pointer to output buffer
* @param nOutOffset current write index into output buffer
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
*/
static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
if (nLength >= MATCH_RUN_LEN_V2) {
if (nLength < (MATCH_RUN_LEN_V2 + 15)) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2);
}
else {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
if (nOutOffset < 0) return -1;
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2 - 24;
else {
pOutData[nOutOffset++] = 233;
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff;
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff;
}
}
}
return nOutOffset;
}
/**
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/
static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int *prev_match = (int*)pCompressor->intervals; /* Reuse */
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
lzsa_match *pBestMatch = pCompressor->best_match;
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
prev_match[nEndOffset - 1] = nEndOffset;
nLastLiteralsOffset = nEndOffset;
pCompressor->best_match[nEndOffset - 1].length = 0;
pCompressor->best_match[nEndOffset - 1].offset = 0;
repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
repmatch_opt[nEndOffset - 1].incoming_offset = -1;
repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
int nLiteralsCost;
int nLiteralsLen = nLastLiteralsOffset - i;
nLiteralsCost = 8 + cost[i + 1];
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
* The cost automatically accumulates down the chain. */
if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
nLiteralsCost += 4;
}
else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
nLiteralsCost += 8;
}
else if (nLiteralsLen == 256) {
nLiteralsCost += 16;
}
if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
nLiteralsCost += MODESWITCH_PENALTY;
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
int m;
cost[i] = nLiteralsCost;
pCompressor->best_match[i].length = 0;
pCompressor->best_match[i].offset = 0;
repmatch_opt[i].best_slot_for_incoming = -1;
repmatch_opt[i].incoming_offset = -1;
repmatch_opt[i].expected_repmatch = 0;
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
nBestCost = nLiteralsCost;
nBestMatchLen = 0;
nBestMatchOffset = 0;
nBestUpdatedSlot = -1;
nBestUpdatedIndex = -1;
nBestExpectedRepMatch = 0;
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
int nCurCost;
int nMatchLen = pMatch[m].length;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
int nCurIndex = prev_match[i + nMatchLen];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
nCurCost += cost[i + nMatchLen];
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = -1;
nBestUpdatedIndex = -1;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
else {
int nMatchLen = pMatch[m].length;
int k, nMatchRunLen;
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
nMatchLen = nEndOffset - LAST_LITERALS - i;
nMatchRunLen = nMatchLen;
if (nMatchRunLen > MATCH_RUN_LEN_V2)
nMatchRunLen = MATCH_RUN_LEN_V2;
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
int nCurCost;
int nCurIndex = prev_match[i + k];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
nCurCost += cost[i + k];
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
int nCurUpdatedSlot = -1;
int nCurUpdatedIndex = -1;
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
int r;
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
if (nAltCost <= nCurCost) {
nCurUpdatedSlot = r;
nCurUpdatedIndex = nCurIndex;
nCurCost = nAltCost;
nCurExpectedRepMatch = 2;
}
}
}
}
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = nCurUpdatedSlot;
nBestUpdatedIndex = nCurUpdatedIndex;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
for (; k <= nMatchLen; k++) {
int nCurCost;
int nCurIndex = prev_match[i + k];
int nMatchOffsetSize = 0;
int nCurExpectedRepMatch = 1;
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
nCurExpectedRepMatch = 0;
}
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
nCurCost += cost[i + k];
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
nCurCost += MODESWITCH_PENALTY;
int nCurUpdatedSlot = -1;
int nCurUpdatedIndex = -1;
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
int r;
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
if (nAltCost <= nCurCost) {
nCurUpdatedSlot = r;
nCurUpdatedIndex = nCurIndex;
nCurCost = nAltCost;
nCurExpectedRepMatch = 2;
}
}
}
}
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
nBestUpdatedSlot = nCurUpdatedSlot;
nBestUpdatedIndex = nCurUpdatedIndex;
nBestExpectedRepMatch = nCurExpectedRepMatch;
}
}
}
pSlotCost[m] = nBestCost;
pMatch[m].length = nBestMatchLen;
pMatch[m].offset = nBestMatchOffset; /* not necessary */
if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
cost[i] = nBestCost;
pCompressor->best_match[i].length = nBestMatchLen;
pCompressor->best_match[i].offset = nBestMatchOffset;
repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
}
}
}
for (; m < NMATCHES_PER_OFFSET; m++) {
pSlotCost[m] = 0;
}
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
nLastLiteralsOffset = i;
prev_match[i] = nLastLiteralsOffset;
}
int nIncomingOffset = -1;
for (i = nStartOffset; i < nEndOffset; ) {
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
pCompressor->best_match[i].length = pMatch->length;
pCompressor->best_match[i].offset = pMatch->offset;
cost[i] = *pSlotCost;
if (repmatch_opt[i].expected_repmatch == 2)
repmatch_opt[i].expected_repmatch = 1;
}
else {
if (repmatch_opt[i].expected_repmatch == 2)
repmatch_opt[i].expected_repmatch = 0;
}
nIncomingOffset = i;
i += pCompressor->best_match[i].length;
}
else {
i++;
}
}
}
/**
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
* impacting the compression ratio
*
* @param pCompressor compression context
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
*/
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
int i;
int nNumLiterals = 0;
int nDidReduce = 0;
int nPreviousMatchOffset = -1;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->best_match + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchLen = pMatch->length;
int nReduce = 0;
int nCurrentMatchOffset = i;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16;
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
* match command by literals, the output size will not increase and it will remove one command. */
nReduce = 1;
}
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
do {
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2);
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
pCompressor->best_match[nCurIndex].offset != nMatchOffset) {
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
}
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->best_match[i + j].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
if (nPreviousMatchOffset >= 0) {
repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
nPreviousMatchOffset = -1;
}
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_VARLEN) {
/* Join */
pMatch->length += pCompressor->best_match[i + nMatchLen].length;
pCompressor->best_match[i + nMatchLen].offset = 0;
pCompressor->best_match[i + nMatchLen].length = -1;
continue;
}
nNumLiterals = 0;
i += nMatchLen;
}
nPreviousMatchOffset = nCurrentMatchOffset;
}
else {
nNumLiterals++;
i++;
}
}
return nDidReduce;
}
/**
* Emit block of compressed data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int i;
int nNumLiterals = 0;
int nInFirstLiteralOffset = 0;
int nOutOffset = 0;
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
int nRepMatchOffset = 0;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->best_match + i;
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen;
int nTokenOffsetMode;
int nOffsetSize;
if (nMatchOffset == nRepMatchOffset) {
nTokenOffsetMode = 0xe0;
nOffsetSize = 0;
}
else {
if (nMatchOffset <= 32) {
nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
nOffsetSize = 4;
}
else if (nMatchOffset <= 512) {
nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
nOffsetSize = 8;
}
else if (nMatchOffset <= (8192 + 512)) {
nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
nOffsetSize = 12;
}
else {
nTokenOffsetMode = 0xc0;
nOffsetSize = 16;
}
}
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
return -1;
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
return -1;
pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen;
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
if (nOutOffset < 0) return -1;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
if (nOutOffset < 0) return -1;
}
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
}
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
if (nOutOffset < 0) return -1;
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
}
else if (nTokenOffsetMode == 0xc0) {
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
}
nRepMatchOffset = nMatchOffset;
nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
if (nOutOffset < 0) return -1;
i += nMatchLen;
pCompressor->num_commands++;
}
else {
if (nNumLiterals == 0)
nInFirstLiteralOffset = i;
nNumLiterals++;
i++;
}
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
return -1;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00;
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
if (nOutOffset < 0) return -1;
if (nNumLiterals != 0) {
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
nOutOffset += nNumLiterals;
nNumLiterals = 0;
}
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if (nOutOffset >= nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0; /* Match offset */
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */
if (nOutOffset < 0) return -1;
if ((nOutOffset + 1) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 232; /* EOD match length byte */
}
if (nCurNibbleOffset != -1) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0);
if (nOutOffset < 0 || nCurNibbleOffset != -1)
return -1;
}
return nOutOffset;
}
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
int nDidReduce;
int nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nPasses++;
} while (nDidReduce && nPasses < 20);
return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
}

53
src/shrink_block_v2.h Normal file
View File

@ -0,0 +1,53 @@
/*
* shrink_v2.h - LZSA2 block compressor definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_BLOCK_V2_H
#define _SHRINK_BLOCK_V2_H
/* Forward declarations */
typedef struct _lzsa_compressor lzsa_compressor;
/**
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V2_H */

194
src/shrink_context.c Normal file
View File

@ -0,0 +1,194 @@
/*
* shrink_context.c - compression context implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_context.h"
#include "shrink_block_v1.h"
#include "shrink_block_v2.h"
#include "format.h"
#include "matchfinder.h"
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
int nResult;
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->best_match = NULL;
pCompressor->slot_cost = NULL;
pCompressor->repmatch_opt = NULL;
pCompressor->min_match_size = nMinMatchSize;
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
pCompressor->min_match_size = nMinMatchSizeForFormat;
else if (pCompressor->min_match_size > 5)
pCompressor->min_match_size = 5;
pCompressor->format_version = nFormatVersion;
pCompressor->flags = nFlags;
pCompressor->num_commands = 0;
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
if (pCompressor->best_match) {
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
if (pCompressor->slot_cost) {
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
if (pCompressor->repmatch_opt)
return 0;
}
}
}
else {
return 0;
}
}
}
}
}
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->repmatch_opt) {
free(pCompressor->repmatch_opt);
pCompressor->repmatch_opt = NULL;
}
if (pCompressor->slot_cost) {
free(pCompressor->slot_cost);
pCompressor->slot_cost = NULL;
}
if (pCompressor->best_match) {
free(pCompressor->best_match);
pCompressor->best_match = NULL;
}
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
}
if (pCompressor->open_intervals) {
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
}
if (pCompressor->pos_data) {
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
if (pCompressor->intervals) {
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
}
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
if (pCompressor->format_version == 1) {
return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
}
else if (pCompressor->format_version == 2) {
return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
}
else {
return -1;
}
}
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
return pCompressor->num_commands;
}

123
src/shrink_context.h Normal file
View File

@ -0,0 +1,123 @@
/*
* shrink_context.h - compression context definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_CONTEXT_H
#define _SHRINK_CONTEXT_H
#include "divsufsort.h"
#define LCP_BITS 15
#define LCP_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3
#define LEAVE_ALONE_MATCH_SIZE 1000
#define LAST_MATCH_OFFSET 4
#define LAST_LITERALS 1
#define MODESWITCH_PENALTY 1
/** One match */
typedef struct _lzsa_match {
unsigned short length;
unsigned short offset;
} lzsa_match;
/** One rep-match slot (for LZSA2) */
typedef struct _lzsa_repmatch_opt {
int incoming_offset;
short best_slot_for_incoming;
short expected_repmatch;
} lzsa_repmatch_opt;
/** Compression context */
typedef struct _lzsa_compressor {
divsufsort_ctx_t divsufsort_context;
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;
lzsa_match *match;
lzsa_match *best_match;
int *slot_cost;
lzsa_repmatch_opt *repmatch_opt;
int min_match_size;
int format_version;
int flags;
int num_commands;
} lzsa_compressor;
/**
* Initialize compression context
*
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
/**
* Clean up compression context and free up any associated resources
*
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
/**
* Compress one block of data
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
* @param nInDataSize number of input bytes to compress
* @param pOutData pointer to output buffer
* @param nMaxOutDataSize maximum size of output buffer, in bytes
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
*
* @return number of commands
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
#endif /* _SHRINK_CONTEXT_H */

178
src/shrink_inmem.c Normal file
View File

@ -0,0 +1,178 @@
/*
* shrink_inmem.c - in-memory compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_inmem.h"
#include "shrink_context.h"
#include "frame.h"
#include "format.h"
#include "lib.h"
/**
* Get maximum compressed size of input(source) data
*
* @param pFileData pointer to input(source) data
* @param nFileSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
}
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
lzsa_compressor compressor;
size_t nOriginalSize = 0;
size_t nCompressedSize = 0L;
int nResult;
int nError = 0;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
return -1;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (nOriginalSize < nInputSize && !nError) {
int nInDataSize;
nInDataSize = (int)(nInputSize - nOriginalSize);
if (nInDataSize > BLOCK_SIZE)
nInDataSize = BLOCK_SIZE;
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
int nOutDataSize;
int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
if (nOutDataEnd > BLOCK_SIZE)
nOutDataEnd = BLOCK_SIZE;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + lzsa_get_frame_size() + nCompressedSize, nOutDataEnd);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += nBlockheaderSize;
nOriginalSize += nInDataSize;
nCompressedSize += nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
nError = LZSA_ERROR_DST;
else {
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
nOriginalSize += nInDataSize;
nCompressedSize += nBlockheaderSize + nInDataSize;
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
nCompressedSize += nFooterSize;
}
lzsa_compressor_destroy(&compressor);
if (nError) {
return -1;
}
else {
return nCompressedSize;
}
}

64
src/shrink_inmem.h Normal file
View File

@ -0,0 +1,64 @@
/*
* shrink_inmem.h - in-memory compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_INMEM_H
#define _SHRINK_INMEM_H
#include <stdlib.h>
/**
* Get maximum compressed size of input(source) data
*
* @param pFileData pointer to input(source) data
* @param nFileSize input(source) size in bytes
*
* @return maximum compressed size
*/
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
/**
* Compress memory
*
* @param pInputData pointer to input(source) data to compress
* @param pOutBuffer buffer for compressed data
* @param nInputSize input(source) size in bytes
* @param nMaxOutBufferSize maximum capacity of compression buffer
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
*
* @return actual compressed size, or -1 for error
*/
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
#endif /* _SHRINK_INMEM_H */

285
src/shrink_streaming.c Normal file
View File

@ -0,0 +1,285 @@
/*
* shrink_streaming.h - streaming compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdlib.h>
#include <string.h>
#include "shrink_streaming.h"
#include "format.h"
#include "frame.h"
#include "lib.h"
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
unsigned char *pInData, *pOutData;
lzsa_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
unsigned char cFrameData[16];
int nError = 0;
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
return LZSA_ERROR_MEMORY;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
memset(pOutData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPreviousBlockSize = nDictionaryDataSize;
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
}
nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += (long long)nBlockheaderSize;
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
}
}
if (!nError) {
if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
else {
if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
}
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
if (!nError && !pInStream->eof(pInStream)) {
if (progress)
progress(nOriginalSize, nCompressedSize);
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nFooterSize;
}
if (progress)
progress(nOriginalSize, nCompressedSize);
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
if (nError) {
return nError;
}
else {
if (pOriginalSize)
*pOriginalSize = nOriginalSize;
if (pCompressedSize)
*pCompressedSize = nCompressedSize;
if (pCommandCount)
*pCommandCount = nCommandCount;
return LZSA_OK;
}
}

86
src/shrink_streaming.h Normal file
View File

@ -0,0 +1,86 @@
/*
* shrink_streaming.h - streaming compression definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _SHRINK_STREAMING_H
#define _SHRINK_STREAMING_H
#include "stream.h"
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
/*-------------- File API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
/*-------------- Streaming API -------------- */
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
#endif /* _SHRINK_STREAMING_H */