mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-04-06 05:41:29 +00:00
Split code, add automated tests, update LZSA2
This commit is contained in:
parent
45cb124c4d
commit
b4e3c07d3a
101
src/dictionary.c
Normal file
101
src/dictionary.c
Normal file
@ -0,0 +1,101 @@
|
||||
/*
|
||||
* dictionary.c - dictionary implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "format.h"
|
||||
#include "lib.h"
|
||||
|
||||
/**
|
||||
* Load dictionary contents
|
||||
*
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
|
||||
unsigned char *pDictionaryData = NULL;
|
||||
int nDictionaryDataSize = 0;
|
||||
|
||||
if (pszDictionaryFilename) {
|
||||
pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
|
||||
if (!pDictionaryData) {
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
|
||||
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
|
||||
if (!pDictionaryFile) {
|
||||
free(pDictionaryData);
|
||||
pDictionaryData = NULL;
|
||||
return LZSA_ERROR_DICTIONARY;
|
||||
}
|
||||
|
||||
fseek(pDictionaryFile, 0, SEEK_END);
|
||||
#ifdef _WIN32
|
||||
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
|
||||
#else
|
||||
off_t nDictionaryFileSize = ftello(pDictionaryFile);
|
||||
#endif
|
||||
if (nDictionaryFileSize > BLOCK_SIZE) {
|
||||
/* Use the last BLOCK_SIZE bytes of the dictionary */
|
||||
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
|
||||
}
|
||||
else {
|
||||
fseek(pDictionaryFile, 0, SEEK_SET);
|
||||
}
|
||||
|
||||
nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
|
||||
if (nDictionaryDataSize < 0)
|
||||
nDictionaryDataSize = 0;
|
||||
|
||||
fclose(pDictionaryFile);
|
||||
pDictionaryFile = NULL;
|
||||
}
|
||||
|
||||
*ppDictionaryData = pDictionaryData;
|
||||
*pDictionaryDataSize = nDictionaryDataSize;
|
||||
return LZSA_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* Free dictionary contents
|
||||
*
|
||||
* @param pDictionaryData pointer to pointer to dictionary contents
|
||||
*/
|
||||
void lzsa_dictionary_free(void **ppDictionaryData) {
|
||||
if (*ppDictionaryData) {
|
||||
free(*ppDictionaryData);
|
||||
ppDictionaryData = NULL;
|
||||
}
|
||||
}
|
56
src/dictionary.h
Normal file
56
src/dictionary.h
Normal file
@ -0,0 +1,56 @@
|
||||
/*
|
||||
* dictionary.h - dictionary definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _DICTIONARY_H
|
||||
#define _DICTIONARY_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* Load dictionary contents
|
||||
*
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
|
||||
|
||||
/**
|
||||
* Free dictionary contents
|
||||
*
|
||||
* @param pDictionaryData pointer to pointer to dictionary contents
|
||||
*/
|
||||
void lzsa_dictionary_free(void **ppDictionaryData);
|
||||
|
||||
#endif /* _DICTIONARY_H */
|
217
src/expand_block_v1.c
Normal file
217
src/expand_block_v1.c
Normal file
@ -0,0 +1,217 @@
|
||||
/*
|
||||
* expand_v1.c - LZSA1 block decompressor implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "format.h"
|
||||
#include "expand_block_v1.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define FORCE_INLINE __forceinline
|
||||
#else /* _MSC_VER */
|
||||
#define FORCE_INLINE __attribute__((always_inline))
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
static inline FORCE_INLINE int lzsa_build_literals_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nLiterals) {
|
||||
unsigned int nByte;
|
||||
const unsigned char *pInBlock = *ppInBlock;
|
||||
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
nByte = *pInBlock++;
|
||||
(*nLiterals) += nByte;
|
||||
|
||||
if (nByte == 250) {
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
(*nLiterals) = 256 + ((unsigned int)*pInBlock++);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (nByte == 249) {
|
||||
if ((pInBlock + 1) < pInBlockEnd) {
|
||||
(*nLiterals) = ((unsigned int)*pInBlock++);
|
||||
(*nLiterals) |= (((unsigned int)*pInBlock++) << 8);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
*ppInBlock = pInBlock;
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, unsigned int *nMatchLen) {
|
||||
unsigned int nByte;
|
||||
const unsigned char *pInBlock = *ppInBlock;
|
||||
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
nByte = *pInBlock++;
|
||||
(*nMatchLen) += nByte;
|
||||
|
||||
if (nByte == 239) {
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
(*nMatchLen) = 256 + ((unsigned int)*pInBlock++);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else if (nByte == 238) {
|
||||
if ((pInBlock + 1) < pInBlockEnd) {
|
||||
(*nMatchLen) = ((unsigned int)*pInBlock++);
|
||||
(*nMatchLen) |= (((unsigned int)*pInBlock++) << 8);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
*ppInBlock = pInBlock;
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress one LZSA1 data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
|
||||
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
|
||||
unsigned char *pCurOutData = pOutData + nOutDataOffset;
|
||||
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
|
||||
const unsigned char *pOutDataFastEnd = pOutDataEnd - 18;
|
||||
|
||||
while (pInBlock < pInBlockEnd) {
|
||||
const unsigned char token = *pInBlock++;
|
||||
unsigned int nLiterals = (unsigned int)((token & 0x70) >> 4);
|
||||
|
||||
if (nLiterals != LITERALS_RUN_LEN_V1 && (pInBlock + 8) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
|
||||
memcpy(pCurOutData, pInBlock, 8);
|
||||
pInBlock += nLiterals;
|
||||
pCurOutData += nLiterals;
|
||||
}
|
||||
else {
|
||||
if (nLiterals == LITERALS_RUN_LEN_V1) {
|
||||
if (lzsa_build_literals_len_v1(&pInBlock, pInBlockEnd, &nLiterals))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nLiterals != 0) {
|
||||
if ((pInBlock + nLiterals) <= pInBlockEnd &&
|
||||
(pCurOutData + nLiterals) <= pOutDataEnd) {
|
||||
memcpy(pCurOutData, pInBlock, nLiterals);
|
||||
pInBlock += nLiterals;
|
||||
pCurOutData += nLiterals;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
|
||||
unsigned int nMatchOffset;
|
||||
|
||||
nMatchOffset = ((unsigned int)(*pInBlock++)) ^ 0xff;
|
||||
if (token & 0x80) {
|
||||
nMatchOffset |= (((unsigned int)(*pInBlock++)) << 8) ^ 0xff00;
|
||||
}
|
||||
nMatchOffset++;
|
||||
|
||||
const unsigned char *pSrc = pCurOutData - nMatchOffset;
|
||||
if (pSrc >= pOutData) {
|
||||
unsigned int nMatchLen = (unsigned int)(token & 0x0f);
|
||||
if (nMatchLen != MATCH_RUN_LEN_V1 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
|
||||
memcpy(pCurOutData, pSrc, 8);
|
||||
memcpy(pCurOutData + 8, pSrc + 8, 8);
|
||||
memcpy(pCurOutData + 16, pSrc + 16, 2);
|
||||
pCurOutData += (MIN_MATCH_SIZE_V1 + nMatchLen);
|
||||
}
|
||||
else {
|
||||
nMatchLen += MIN_MATCH_SIZE_V1;
|
||||
if (nMatchLen == (MATCH_RUN_LEN_V1 + MIN_MATCH_SIZE_V1)) {
|
||||
if (lzsa_build_match_len_v1(&pInBlock, pInBlockEnd, &nMatchLen))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
|
||||
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
|
||||
|
||||
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
|
||||
const unsigned char *pCopySrc = pSrc;
|
||||
unsigned char *pCopyDst = pCurOutData;
|
||||
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
|
||||
|
||||
do {
|
||||
memcpy(pCopyDst, pCopySrc, 16);
|
||||
pCopySrc += 16;
|
||||
pCopyDst += 16;
|
||||
} while (pCopyDst < pCopyEndDst);
|
||||
|
||||
pCurOutData += nMatchLen;
|
||||
}
|
||||
else {
|
||||
while (nMatchLen) {
|
||||
*pCurOutData++ = *pSrc++;
|
||||
nMatchLen--;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (int)(pCurOutData - (pOutData + nOutDataOffset));
|
||||
}
|
49
src/expand_block_v1.h
Normal file
49
src/expand_block_v1.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* expand_v1.h - LZSA1 block decompressor definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXPAND_V1_H
|
||||
#define _EXPAND_V1_H
|
||||
|
||||
/**
|
||||
* Decompress one LZSA1 data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
|
||||
|
||||
#endif /* _EXPAND_V1_H */
|
242
src/expand_block_v2.c
Normal file
242
src/expand_block_v2.c
Normal file
@ -0,0 +1,242 @@
|
||||
/*
|
||||
* expand_v2.c - LZSA2 block decompressor implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "format.h"
|
||||
#include "expand_block_v2.h"
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define FORCE_INLINE __forceinline
|
||||
#else /* _MSC_VER */
|
||||
#define FORCE_INLINE __attribute__((always_inline))
|
||||
#endif /* _MSC_VER */
|
||||
|
||||
static inline FORCE_INLINE unsigned int lzsa_get_nibble_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nValue) {
|
||||
if ((*nCurNibbles ^= 1) != 0) {
|
||||
const unsigned char *pInBlock = *ppInBlock;
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
(*nibbles) = *pInBlock++;
|
||||
*ppInBlock = pInBlock;
|
||||
(*nValue) = ((unsigned int)((*nibbles) & 0xf0)) >> 4;
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
(*nValue) = (unsigned int)((*nibbles) & 0x0f);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock, const unsigned char *pInBlockEnd, int *nCurNibbles, unsigned char *nibbles, unsigned int *nLength) {
|
||||
unsigned int nValue;
|
||||
|
||||
if (!lzsa_get_nibble_v2(ppInBlock, pInBlockEnd, nCurNibbles, nibbles, &nValue)) {
|
||||
(*nLength) += nValue;
|
||||
|
||||
if (nValue == 15) {
|
||||
const unsigned char *pInBlock = *ppInBlock;
|
||||
|
||||
if (pInBlock < pInBlockEnd) {
|
||||
(*nLength) += ((unsigned int)*pInBlock++);
|
||||
|
||||
if ((*nLength) == 257) {
|
||||
if ((pInBlock + 1) < pInBlockEnd) {
|
||||
(*nLength) = ((unsigned int)*pInBlock++);
|
||||
(*nLength) |= (((unsigned int)*pInBlock++) << 8);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
|
||||
*ppInBlock = pInBlock;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress one LZSA2 data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
|
||||
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
|
||||
unsigned char *pCurOutData = pOutData + nOutDataOffset;
|
||||
const unsigned char *pOutDataEnd = pCurOutData + nBlockMaxSize;
|
||||
const unsigned char *pOutDataFastEnd = pOutDataEnd - 20;
|
||||
int nCurNibbles = 0;
|
||||
unsigned char nibbles;
|
||||
int nMatchOffset = 0;
|
||||
|
||||
while (pInBlock < pInBlockEnd) {
|
||||
const unsigned char token = *pInBlock++;
|
||||
unsigned int nLiterals = (unsigned int)((token & 0x18) >> 3);
|
||||
|
||||
if (nLiterals != LITERALS_RUN_LEN_V2 && (pInBlock + 4) <= pInBlockEnd && pCurOutData < pOutDataFastEnd) {
|
||||
memcpy(pCurOutData, pInBlock, 4);
|
||||
pInBlock += nLiterals;
|
||||
pCurOutData += nLiterals;
|
||||
}
|
||||
else {
|
||||
if (nLiterals == LITERALS_RUN_LEN_V2) {
|
||||
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nLiterals))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nLiterals != 0) {
|
||||
if ((pInBlock + nLiterals) <= pInBlockEnd &&
|
||||
(pCurOutData + nLiterals) <= pOutDataEnd) {
|
||||
memcpy(pCurOutData, pInBlock, nLiterals);
|
||||
pInBlock += nLiterals;
|
||||
pCurOutData += nLiterals;
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if ((pInBlock + 1) < pInBlockEnd) { /* The last token in the block does not include match information */
|
||||
unsigned char nOffsetMode = token & 0xc0;
|
||||
unsigned int nValue;
|
||||
|
||||
switch (nOffsetMode) {
|
||||
case 0x00:
|
||||
/* 5 bit offset */
|
||||
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
||||
return -1;
|
||||
nMatchOffset = nValue;
|
||||
nMatchOffset |= ((token & 0x20) >> 1);
|
||||
nMatchOffset ^= 0x1f;
|
||||
nMatchOffset++;
|
||||
break;
|
||||
|
||||
case 0x40:
|
||||
/* 9 bit offset */
|
||||
nMatchOffset = (unsigned int)(*pInBlock++);
|
||||
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
|
||||
nMatchOffset ^= 0x1ff;
|
||||
nMatchOffset++;
|
||||
break;
|
||||
|
||||
case 0x80:
|
||||
/* 13 bit offset */
|
||||
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
||||
return -1;
|
||||
nMatchOffset = (unsigned int)(*pInBlock++);
|
||||
nMatchOffset |= (nValue << 8);
|
||||
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
|
||||
nMatchOffset ^= 0x1fff;
|
||||
nMatchOffset += (512 + 1);
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Check if this is a 16 bit offset or a rep-match */
|
||||
if ((token & 0x20) == 0) {
|
||||
/* 16 bit offset */
|
||||
nMatchOffset = (((unsigned int)(*pInBlock++)) << 8);
|
||||
nMatchOffset |= (unsigned int)(*pInBlock++);
|
||||
nMatchOffset ^= 0xffff;
|
||||
nMatchOffset++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
const unsigned char *pSrc = pCurOutData - nMatchOffset;
|
||||
if (pSrc >= pOutData) {
|
||||
unsigned int nMatchLen = (unsigned int)(token & 0x07);
|
||||
if (nMatchLen != MATCH_RUN_LEN_V2 && nMatchOffset >= 8 && pCurOutData < pOutDataFastEnd) {
|
||||
memcpy(pCurOutData, pSrc, 8);
|
||||
memcpy(pCurOutData + 8, pSrc + 8, 2);
|
||||
pCurOutData += (MIN_MATCH_SIZE_V2 + nMatchLen);
|
||||
}
|
||||
else {
|
||||
nMatchLen += MIN_MATCH_SIZE_V2;
|
||||
if (nMatchLen == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
|
||||
if (lzsa_build_len_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nMatchLen))
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((pCurOutData + nMatchLen) <= pOutDataEnd) {
|
||||
/* Do a deterministic, left to right byte copy instead of memcpy() so as to handle overlaps */
|
||||
|
||||
if (nMatchOffset >= 16 && (pCurOutData + nMatchLen) < (pOutDataFastEnd - 15)) {
|
||||
const unsigned char *pCopySrc = pSrc;
|
||||
unsigned char *pCopyDst = pCurOutData;
|
||||
const unsigned char *pCopyEndDst = pCurOutData + nMatchLen;
|
||||
|
||||
do {
|
||||
memcpy(pCopyDst, pCopySrc, 16);
|
||||
pCopySrc += 16;
|
||||
pCopyDst += 16;
|
||||
} while (pCopyDst < pCopyEndDst);
|
||||
|
||||
pCurOutData += nMatchLen;
|
||||
}
|
||||
else {
|
||||
while (nMatchLen) {
|
||||
*pCurOutData++ = *pSrc++;
|
||||
nMatchLen--;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (int)(pCurOutData - (pOutData + nOutDataOffset));
|
||||
}
|
49
src/expand_block_v2.h
Normal file
49
src/expand_block_v2.h
Normal file
@ -0,0 +1,49 @@
|
||||
/*
|
||||
* expand_v2.h - LZSA2 block decompressor definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXPAND_V2_H
|
||||
#define _EXPAND_V2_H
|
||||
|
||||
/**
|
||||
* Decompress one LZSA2 data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
|
||||
|
||||
#endif /* _EXPAND_V2_H */
|
57
src/expand_context.c
Normal file
57
src/expand_context.c
Normal file
@ -0,0 +1,57 @@
|
||||
/*
|
||||
* expand_context.h - decompressor context definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "expand_context.h"
|
||||
#include "expand_block_v1.h"
|
||||
#include "expand_block_v2.h"
|
||||
|
||||
/**
|
||||
* Decompress one data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
|
||||
if (nFormatVersion == 1)
|
||||
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
|
||||
else if (nFormatVersion == 2)
|
||||
return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
|
||||
else
|
||||
return -1;
|
||||
}
|
51
src/expand_context.h
Normal file
51
src/expand_context.h
Normal file
@ -0,0 +1,51 @@
|
||||
/*
|
||||
* expand_context.h - decompressor context definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXPAND_CONTEXT_H
|
||||
#define _EXPAND_CONTEXT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* Decompress one data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
|
||||
|
||||
#endif /* _EXPAND_CONTEXT_H */
|
160
src/expand_inmem.c
Normal file
160
src/expand_inmem.c
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
* expand_inmem.c - in-memory decompression implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "expand_inmem.h"
|
||||
#include "lib.h"
|
||||
#include "frame.h"
|
||||
|
||||
#define BLOCK_SIZE 65536
|
||||
|
||||
/**
|
||||
* Get maximum decompressed size of compressed data
|
||||
*
|
||||
* @param pFileData compressed data
|
||||
* @param nFileSize compressed size in bytes
|
||||
*
|
||||
* @return maximum decompressed size
|
||||
*/
|
||||
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize) {
|
||||
const unsigned char *pCurFileData = pFileData;
|
||||
const unsigned char *pEndFileData = pCurFileData + nFileSize;
|
||||
int nFormatVersion = 0;
|
||||
size_t nMaxDecompressedSize = 0;
|
||||
const int nHeaderSize = lzsa_get_header_size();
|
||||
|
||||
/* Check header */
|
||||
if ((pCurFileData + nHeaderSize) > pEndFileData ||
|
||||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
|
||||
return -1;
|
||||
|
||||
pCurFileData += nHeaderSize;
|
||||
|
||||
while (pCurFileData < pEndFileData) {
|
||||
unsigned int nBlockDataSize = 0;
|
||||
int nIsUncompressed = 0;
|
||||
const int nFrameSize = lzsa_get_frame_size();
|
||||
|
||||
/* Decode frame header */
|
||||
if ((pCurFileData + nFrameSize) > pEndFileData ||
|
||||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
|
||||
return -1;
|
||||
pCurFileData += nFrameSize;
|
||||
|
||||
if (!nBlockDataSize)
|
||||
break;
|
||||
|
||||
/* Add one potentially full block to the decompressed size */
|
||||
nMaxDecompressedSize += BLOCK_SIZE;
|
||||
|
||||
if ((pCurFileData + nBlockDataSize) > pEndFileData)
|
||||
return -1;
|
||||
|
||||
pCurFileData += nBlockDataSize;
|
||||
}
|
||||
|
||||
return nMaxDecompressedSize;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decompress data in memory
|
||||
*
|
||||
* @param pFileData compressed data
|
||||
* @param pOutBuffer buffer for decompressed data
|
||||
* @param nFileSize compressed size in bytes
|
||||
* @param nMaxOutBufferSize maximum capacity of decompression buffer
|
||||
* @param pFormatVersion pointer to format version, updated if this function is successful
|
||||
*
|
||||
* @return actual decompressed size, or -1 for error
|
||||
*/
|
||||
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion) {
|
||||
const unsigned char *pCurFileData = pFileData;
|
||||
const unsigned char *pEndFileData = pCurFileData + nFileSize;
|
||||
unsigned char *pCurOutBuffer = pOutBuffer;
|
||||
const unsigned char *pEndOutBuffer = pCurOutBuffer + nMaxOutBufferSize;
|
||||
int nFormatVersion = 0;
|
||||
int nPreviousBlockSize;
|
||||
const int nHeaderSize = lzsa_get_header_size();
|
||||
|
||||
/* Check header */
|
||||
if ((pCurFileData + nHeaderSize) > pEndFileData ||
|
||||
lzsa_decode_header(pCurFileData, nHeaderSize, &nFormatVersion) != 0)
|
||||
return -1;
|
||||
|
||||
pCurFileData += nHeaderSize;
|
||||
nPreviousBlockSize = 0;
|
||||
|
||||
while (pCurFileData < pEndFileData) {
|
||||
unsigned int nBlockDataSize = 0;
|
||||
int nIsUncompressed = 0;
|
||||
const int nFrameSize = lzsa_get_frame_size();
|
||||
|
||||
/* Decode frame header */
|
||||
if ((pCurFileData + nFrameSize) > pEndFileData ||
|
||||
lzsa_decode_frame(pCurFileData, nFrameSize, &nBlockDataSize, &nIsUncompressed) != 0)
|
||||
return -1;
|
||||
pCurFileData += nFrameSize;
|
||||
|
||||
if (!nBlockDataSize)
|
||||
break;
|
||||
|
||||
if (!nIsUncompressed) {
|
||||
int nDecompressedSize;
|
||||
|
||||
/* Decompress block */
|
||||
if ((pCurFileData + nBlockDataSize) > pEndFileData)
|
||||
return -1;
|
||||
|
||||
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
|
||||
if (nDecompressedSize < 0)
|
||||
return -1;
|
||||
|
||||
pCurOutBuffer += nDecompressedSize;
|
||||
nPreviousBlockSize = nDecompressedSize;
|
||||
}
|
||||
else {
|
||||
/* Copy uncompressed block */
|
||||
if ((pCurFileData + nBlockDataSize) > pEndFileData)
|
||||
return -1;
|
||||
if ((pCurOutBuffer + nBlockDataSize) > pEndOutBuffer)
|
||||
return -1;
|
||||
memcpy(pCurOutBuffer, pCurFileData, nBlockDataSize);
|
||||
pCurOutBuffer += nBlockDataSize;
|
||||
}
|
||||
|
||||
pCurFileData += nBlockDataSize;
|
||||
}
|
||||
|
||||
*pFormatVersion = nFormatVersion;
|
||||
return (int)(pCurOutBuffer - pOutBuffer);
|
||||
}
|
61
src/expand_inmem.h
Normal file
61
src/expand_inmem.h
Normal file
@ -0,0 +1,61 @@
|
||||
/*
|
||||
* expand_inmem.h - in-memory decompression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXPAND_INMEM_H
|
||||
#define _EXPAND_INMEM_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* Get maximum decompressed size of compressed data
|
||||
*
|
||||
* @param pFileData compressed data
|
||||
* @param nFileSize compressed size in bytes
|
||||
*
|
||||
* @return maximum decompressed size
|
||||
*/
|
||||
size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size_t nFileSize);
|
||||
|
||||
/**
|
||||
* Decompress data in memory
|
||||
*
|
||||
* @param pFileData compressed data
|
||||
* @param pOutBuffer buffer for decompressed data
|
||||
* @param nFileSize compressed size in bytes
|
||||
* @param nMaxOutBufferSize maximum capacity of decompression buffer
|
||||
* @param pFormatVersion pointer to format version, updated if this function is successful
|
||||
*
|
||||
* @return actual decompressed size, or -1 for error
|
||||
*/
|
||||
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
|
||||
|
||||
#endif /* _EXPAND_INMEM_H */
|
243
src/expand_streaming.c
Normal file
243
src/expand_streaming.c
Normal file
@ -0,0 +1,243 @@
|
||||
/*
|
||||
* expand_streaming.c - streaming decompression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "expand_streaming.h"
|
||||
#include "format.h"
|
||||
#include "frame.h"
|
||||
#include "lib.h"
|
||||
|
||||
/*-------------- File API -------------- */
|
||||
|
||||
/**
|
||||
* Decompress file
|
||||
*
|
||||
* @param pszInFilename name of input(compressed) file to decompress
|
||||
* @param pszOutFilename name of output(decompressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize) {
|
||||
lzsa_stream_t inStream, outStream;
|
||||
void *pDictionaryData = NULL;
|
||||
int nDictionaryDataSize = 0;
|
||||
lzsa_status_t nStatus;
|
||||
|
||||
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
|
||||
return LZSA_ERROR_SRC;
|
||||
}
|
||||
|
||||
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
|
||||
inStream.close(&inStream);
|
||||
return LZSA_ERROR_DST;
|
||||
}
|
||||
|
||||
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
|
||||
if (nStatus) {
|
||||
outStream.close(&outStream);
|
||||
inStream.close(&inStream);
|
||||
return nStatus;
|
||||
}
|
||||
|
||||
nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
|
||||
|
||||
lzsa_dictionary_free(&pDictionaryData);
|
||||
outStream.close(&outStream);
|
||||
inStream.close(&inStream);
|
||||
|
||||
return nStatus;
|
||||
}
|
||||
|
||||
/*-------------- Streaming API -------------- */
|
||||
|
||||
/**
|
||||
* Decompress stream
|
||||
*
|
||||
* @param pInStream input(compressed) stream to decompress
|
||||
* @param pOutStream output(decompressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize) {
|
||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||
unsigned char cFrameData[16];
|
||||
unsigned char *pInBlock;
|
||||
unsigned char *pOutData;
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
const int nHeaderSize = lzsa_get_header_size();
|
||||
|
||||
memset(cFrameData, 0, 16);
|
||||
if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
|
||||
return LZSA_ERROR_SRC;
|
||||
}
|
||||
|
||||
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
|
||||
return LZSA_ERROR_FORMAT;
|
||||
}
|
||||
|
||||
nCompressedSize += (long long)nHeaderSize;
|
||||
}
|
||||
|
||||
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
|
||||
if (!pInBlock) {
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
|
||||
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
|
||||
if (!pOutData) {
|
||||
free(pInBlock);
|
||||
pInBlock = NULL;
|
||||
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
|
||||
int nDecompressionError = 0;
|
||||
int nPrevDecompressedSize = 0;
|
||||
int nNumBlocks = 0;
|
||||
|
||||
while (!pInStream->eof(pInStream) && !nDecompressionError) {
|
||||
unsigned int nBlockSize = 0;
|
||||
int nIsUncompressed = 0;
|
||||
|
||||
if (nPrevDecompressedSize != 0) {
|
||||
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
|
||||
}
|
||||
else if (nDictionaryDataSize && pDictionaryData) {
|
||||
nPrevDecompressedSize = nDictionaryDataSize;
|
||||
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
|
||||
}
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
const int nFrameSize = lzsa_get_frame_size();
|
||||
|
||||
memset(cFrameData, 0, 16);
|
||||
if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
|
||||
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
|
||||
nDecompressionError = LZSA_ERROR_FORMAT;
|
||||
nBlockSize = 0;
|
||||
}
|
||||
|
||||
nCompressedSize += (long long)nFrameSize;
|
||||
}
|
||||
else {
|
||||
nDecompressionError = LZSA_ERROR_SRC;
|
||||
nBlockSize = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!nNumBlocks)
|
||||
nBlockSize = BLOCK_SIZE;
|
||||
else
|
||||
nBlockSize = 0;
|
||||
}
|
||||
|
||||
if (nBlockSize != 0) {
|
||||
int nDecompressedSize = 0;
|
||||
|
||||
if ((int)nBlockSize > BLOCK_SIZE) {
|
||||
nDecompressionError = LZSA_ERROR_FORMAT;
|
||||
break;
|
||||
}
|
||||
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
|
||||
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
|
||||
if (nReadBytes > 2)
|
||||
nReadBytes -= 2;
|
||||
else
|
||||
nReadBytes = 0;
|
||||
nBlockSize = (unsigned int)nReadBytes;
|
||||
}
|
||||
|
||||
if (nReadBytes == nBlockSize) {
|
||||
nCompressedSize += (long long)nReadBytes;
|
||||
|
||||
if (nIsUncompressed) {
|
||||
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
|
||||
nDecompressedSize = nBlockSize;
|
||||
}
|
||||
else {
|
||||
unsigned int nBlockOffs = 0;
|
||||
|
||||
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
|
||||
if (nDecompressedSize < 0) {
|
||||
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (nDecompressedSize != 0) {
|
||||
nOriginalSize += (long long)nDecompressedSize;
|
||||
|
||||
if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
|
||||
nDecompressionError = LZSA_ERROR_DST;
|
||||
nPrevDecompressedSize = nDecompressedSize;
|
||||
nDecompressedSize = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
|
||||
nNumBlocks++;
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
free(pOutData);
|
||||
pOutData = NULL;
|
||||
|
||||
free(pInBlock);
|
||||
pInBlock = NULL;
|
||||
|
||||
*pOriginalSize = nOriginalSize;
|
||||
*pCompressedSize = nCompressedSize;
|
||||
return nDecompressionError;
|
||||
}
|
||||
|
78
src/expand_streaming.h
Normal file
78
src/expand_streaming.h
Normal file
@ -0,0 +1,78 @@
|
||||
/*
|
||||
* expand_streaming.h - streaming decompression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _EXPAND_STREAMING_H
|
||||
#define _EXPAND_STREAMING_H
|
||||
|
||||
#include "stream.h"
|
||||
|
||||
/* Forward declaration */
|
||||
typedef enum _lzsa_status_t lzsa_status_t;
|
||||
|
||||
/*-------------- File API -------------- */
|
||||
|
||||
/**
|
||||
* Decompress file
|
||||
*
|
||||
* @param pszInFilename name of input(compressed) file to decompress
|
||||
* @param pszOutFilename name of output(decompressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize);
|
||||
|
||||
/*-------------- Streaming API -------------- */
|
||||
|
||||
/**
|
||||
* Decompress stream
|
||||
*
|
||||
* @param pInStream input(compressed) stream to decompress
|
||||
* @param pOutStream output(decompressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize);
|
||||
|
||||
#endif /* _EXPAND_STREAMING_H */
|
@ -38,6 +38,8 @@
|
||||
|
||||
#define MAX_VARLEN 0xffff
|
||||
|
||||
#define BLOCK_SIZE 65536
|
||||
|
||||
#define MIN_MATCH_SIZE_V1 3
|
||||
#define LITERALS_RUN_LEN_V1 7
|
||||
#define MATCH_RUN_LEN_V1 15
|
||||
|
@ -31,6 +31,7 @@
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "frame.h"
|
||||
|
||||
#define LZSA_ID_0 0x7b
|
||||
|
208
src/lib.h
208
src/lib.h
@ -33,11 +33,19 @@
|
||||
#ifndef _LIB_H
|
||||
#define _LIB_H
|
||||
|
||||
#include "divsufsort.h"
|
||||
#include "stream.h"
|
||||
#include "dictionary.h"
|
||||
#include "frame.h"
|
||||
#include "format.h"
|
||||
#include "shrink_context.h"
|
||||
#include "shrink_streaming.h"
|
||||
#include "shrink_inmem.h"
|
||||
#include "expand_context.h"
|
||||
#include "expand_streaming.h"
|
||||
#include "expand_inmem.h"
|
||||
|
||||
/** High level status for compression and decompression */
|
||||
typedef enum {
|
||||
typedef enum _lzsa_status_t {
|
||||
LZSA_OK = 0, /**< Success */
|
||||
LZSA_ERROR_SRC, /**< Error reading input */
|
||||
LZSA_ERROR_DST, /**< Error reading output */
|
||||
@ -58,200 +66,4 @@ typedef enum {
|
||||
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
|
||||
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
|
||||
|
||||
/*-------------- Top level API -------------- */
|
||||
|
||||
/**
|
||||
* Compress file
|
||||
*
|
||||
* @param pszInFilename name of input(source) file to compress
|
||||
* @param pszOutFilename name of output(compressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
||||
|
||||
/**
|
||||
* Decompress file
|
||||
*
|
||||
* @param pszInFilename name of input(compressed) file to decompress
|
||||
* @param pszOutFilename name of output(decompressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize);
|
||||
|
||||
/*-------------- Streaming API -------------- */
|
||||
|
||||
/**
|
||||
* Load dictionary contents
|
||||
*
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
|
||||
|
||||
/**
|
||||
* Free dictionary contents
|
||||
*
|
||||
* @param pDictionaryData pointer to pointer to dictionary contents
|
||||
*/
|
||||
void lzsa_dictionary_free(void **ppDictionaryData);
|
||||
|
||||
/**
|
||||
* Compress stream
|
||||
*
|
||||
* @param pInStream input(source) stream to compress
|
||||
* @param pOutStream output(compressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
||||
|
||||
/**
|
||||
* Decompress stream
|
||||
*
|
||||
* @param pInStream input(compressed) stream to decompress
|
||||
* @param pOutStream output(decompressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
|
||||
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
|
||||
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
||||
long long *pOriginalSize, long long *pCompressedSize);
|
||||
|
||||
/*-------------- Block compression API --------------*/
|
||||
|
||||
#define LCP_BITS 15
|
||||
#define LCP_MAX (1<<(LCP_BITS - 1))
|
||||
#define LCP_SHIFT (32-LCP_BITS)
|
||||
#define LCP_MASK (((1<<LCP_BITS) - 1) << LCP_SHIFT)
|
||||
#define POS_MASK ((1<<LCP_SHIFT) - 1)
|
||||
|
||||
#define NMATCHES_PER_OFFSET 8
|
||||
#define MATCHES_PER_OFFSET_SHIFT 3
|
||||
|
||||
#define LEAVE_ALONE_MATCH_SIZE 1000
|
||||
|
||||
#define LAST_MATCH_OFFSET 4
|
||||
#define LAST_LITERALS 1
|
||||
|
||||
#define MODESWITCH_PENALTY 1
|
||||
|
||||
/** One match */
|
||||
typedef struct _lzsa_match {
|
||||
unsigned short length;
|
||||
unsigned short offset;
|
||||
} lzsa_match;
|
||||
|
||||
/** One rep-match slot (for LZSA2) */
|
||||
typedef struct _lzsa_repmatch_opt {
|
||||
int incoming_offset;
|
||||
short best_slot_for_incoming;
|
||||
short expected_repmatch;
|
||||
} lzsa_repmatch_opt;
|
||||
|
||||
/** Compression context */
|
||||
typedef struct _lsza_compressor {
|
||||
divsufsort_ctx_t divsufsort_context;
|
||||
unsigned int *intervals;
|
||||
unsigned int *pos_data;
|
||||
unsigned int *open_intervals;
|
||||
lzsa_match *match;
|
||||
lzsa_match *best_match;
|
||||
int *slot_cost;
|
||||
lzsa_repmatch_opt *repmatch_opt;
|
||||
int min_match_size;
|
||||
int format_version;
|
||||
int flags;
|
||||
int num_commands;
|
||||
} lsza_compressor;
|
||||
|
||||
/**
|
||||
* Initialize compression context
|
||||
*
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
* @param nFlags compression flags
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
|
||||
|
||||
/**
|
||||
* Clean up compression context and free up any associated resources
|
||||
*
|
||||
* @param pCompressor compression context to clean up
|
||||
*/
|
||||
void lzsa_compressor_destroy(lsza_compressor *pCompressor);
|
||||
|
||||
/**
|
||||
* Compress one block of data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
|
||||
* @param nInDataSize number of input bytes to compress
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||
|
||||
/**
|
||||
* Get the number of compression commands issued in compressed data blocks
|
||||
*
|
||||
* @return number of commands
|
||||
*/
|
||||
int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
|
||||
|
||||
/**
|
||||
* Decompress one data block
|
||||
*
|
||||
* @param pInBlock pointer to compressed data
|
||||
* @param nInBlockSize size of compressed data, in bytes
|
||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||
*
|
||||
* @return size of decompressed data in bytes, or -1 for error
|
||||
*/
|
||||
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
|
||||
|
||||
#endif /* _LIB_H */
|
||||
|
402
src/lzsa.c
402
src/lzsa.c
@ -41,7 +41,6 @@
|
||||
#include <sys/time.h>
|
||||
#endif
|
||||
#include "lib.h"
|
||||
#include "inmem.h"
|
||||
|
||||
#define OPT_VERBOSE 1
|
||||
#define OPT_RAW 2
|
||||
@ -115,7 +114,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
|
||||
nStartTime = do_get_time();
|
||||
}
|
||||
|
||||
nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
|
||||
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
|
||||
|
||||
if ((nOptions & OPT_VERBOSE)) {
|
||||
nEndTime = do_get_time();
|
||||
@ -345,7 +344,369 @@ static int do_compare(const char *pszInFilename, const char *pszOutFilename, con
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
|
||||
static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) {
|
||||
size_t nIndex = 0;
|
||||
int nMatchProbability = (int)(fMatchProbability * 1023.0f);
|
||||
|
||||
srand(nSeed);
|
||||
|
||||
if (nIndex >= nBufferSize) return;
|
||||
pBuffer[nIndex++] = rand() % nNumLiteralValues;
|
||||
|
||||
while (nIndex < nBufferSize) {
|
||||
if ((rand() & 1023) >= nMatchProbability) {
|
||||
size_t nLiteralCount = rand() & 127;
|
||||
if (nLiteralCount > (nBufferSize - nIndex))
|
||||
nLiteralCount = nBufferSize - nIndex;
|
||||
|
||||
while (nLiteralCount--)
|
||||
pBuffer[nIndex++] = rand() % nNumLiteralValues;
|
||||
}
|
||||
else {
|
||||
size_t nMatchLength = nMinMatchSize + (rand() & 1023);
|
||||
size_t nMatchOffset;
|
||||
|
||||
if (nMatchLength > (nBufferSize - nIndex))
|
||||
nMatchLength = nBufferSize - nIndex;
|
||||
if (nMatchLength > nIndex)
|
||||
nMatchLength = nIndex;
|
||||
|
||||
if (nMatchLength < nIndex)
|
||||
nMatchOffset = rand() % (nIndex - nMatchLength);
|
||||
else
|
||||
nMatchOffset = 0;
|
||||
|
||||
while (nMatchLength--) {
|
||||
pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset];
|
||||
nIndex++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) {
|
||||
size_t nIndex = 0;
|
||||
int nXorProbability = (int)(fXorProbability * 1023.0f);
|
||||
|
||||
srand(nSeed);
|
||||
|
||||
if (nIndex >= nBufferSize) return;
|
||||
|
||||
while (nIndex < nBufferSize) {
|
||||
if ((rand() & 1023) < nXorProbability) {
|
||||
pBuffer[nIndex] ^= 0xff;
|
||||
}
|
||||
nIndex++;
|
||||
}
|
||||
}
|
||||
|
||||
static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
|
||||
unsigned char *pGeneratedData;
|
||||
unsigned char *pCompressedData;
|
||||
unsigned char *pTmpCompressedData;
|
||||
unsigned char *pTmpDecompressedData;
|
||||
size_t nGeneratedDataSize;
|
||||
size_t nMaxCompressedDataSize;
|
||||
unsigned int nSeed = 123;
|
||||
int nFlags;
|
||||
int i;
|
||||
|
||||
nFlags = 0;
|
||||
if (nOptions & OPT_FAVOR_RATIO)
|
||||
nFlags |= LZSA_FLAG_FAVOR_RATIO;
|
||||
if (nOptions & OPT_RAW)
|
||||
nFlags |= LZSA_FLAG_RAW_BLOCK;
|
||||
|
||||
pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
|
||||
if (!pGeneratedData) {
|
||||
fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
|
||||
return 100;
|
||||
}
|
||||
|
||||
nMaxCompressedDataSize = lzsa_get_max_compressed_size_inmem(4 * BLOCK_SIZE);
|
||||
pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
|
||||
if (!pCompressedData) {
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
|
||||
return 100;
|
||||
}
|
||||
|
||||
pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize);
|
||||
if (!pTmpCompressedData) {
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize);
|
||||
return 100;
|
||||
}
|
||||
|
||||
pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE);
|
||||
if (!pTmpDecompressedData) {
|
||||
free(pTmpCompressedData);
|
||||
pTmpCompressedData = NULL;
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE);
|
||||
return 100;
|
||||
}
|
||||
|
||||
memset(pGeneratedData, 0, 4 * BLOCK_SIZE);
|
||||
memset(pCompressedData, 0, nMaxCompressedDataSize);
|
||||
memset(pTmpCompressedData, 0, nMaxCompressedDataSize);
|
||||
|
||||
/* Test compressing with a too small buffer to do anything, expect to fail cleanly */
|
||||
for (i = 0; i < 12; i++) {
|
||||
generate_compressible_data(pGeneratedData, i, nMinMatchSize, nSeed, 256, 0.5f);
|
||||
lzsa_compress_inmem(pGeneratedData, pCompressedData, i, i, nFlags, nMinMatchSize, nFormatVersion);
|
||||
}
|
||||
|
||||
size_t nDataSizeStep = 128;
|
||||
float fProbabilitySizeStep = 0.0005f;
|
||||
|
||||
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= (4 * BLOCK_SIZE); nGeneratedDataSize += nDataSizeStep) {
|
||||
float fMatchProbability;
|
||||
|
||||
fprintf(stdout, "size %zd", nGeneratedDataSize);
|
||||
for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) {
|
||||
int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 };
|
||||
float fXorProbability;
|
||||
|
||||
fputc('.', stdout);
|
||||
fflush(stdout);
|
||||
|
||||
for (i = 0; i < 12; i++) {
|
||||
/* Generate data to compress */
|
||||
generate_compressible_data(pGeneratedData, nGeneratedDataSize, nMinMatchSize, nSeed, nNumLiteralValues[i], fMatchProbability);
|
||||
|
||||
/* Try to compress it, expected to succeed */
|
||||
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
|
||||
nFlags, nMinMatchSize, nFormatVersion);
|
||||
if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
|
||||
free(pTmpDecompressedData);
|
||||
pTmpDecompressedData = NULL;
|
||||
free(pTmpCompressedData);
|
||||
pTmpCompressedData = NULL;
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
|
||||
return 100;
|
||||
}
|
||||
|
||||
/* Try to decompress it, expected to succeed */
|
||||
size_t nActualDecompressedSize;
|
||||
int nDecFormatVersion = 0;
|
||||
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
|
||||
if (nActualDecompressedSize == -1) {
|
||||
free(pTmpDecompressedData);
|
||||
pTmpDecompressedData = NULL;
|
||||
free(pTmpCompressedData);
|
||||
pTmpCompressedData = NULL;
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
|
||||
return 100;
|
||||
}
|
||||
|
||||
if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) {
|
||||
free(pTmpDecompressedData);
|
||||
pTmpDecompressedData = NULL;
|
||||
free(pTmpCompressedData);
|
||||
pTmpCompressedData = NULL;
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]);
|
||||
return 100;
|
||||
}
|
||||
|
||||
/* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */
|
||||
for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) {
|
||||
memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize);
|
||||
xor_data(pTmpCompressedData + lzsa_get_header_size() + lzsa_get_frame_size(), nActualCompressedSize - lzsa_get_header_size() - lzsa_get_frame_size() - lzsa_get_frame_size() /* footer */, nSeed, fXorProbability);
|
||||
lzsa_decompress_inmem(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, &nDecFormatVersion);
|
||||
}
|
||||
}
|
||||
|
||||
nSeed++;
|
||||
}
|
||||
|
||||
fputc(10, stdout);
|
||||
fflush(stdout);
|
||||
|
||||
nDataSizeStep <<= 1;
|
||||
if (nDataSizeStep > (128 * 4096))
|
||||
nDataSizeStep = 128 * 4096;
|
||||
fProbabilitySizeStep *= 1.25;
|
||||
if (fProbabilitySizeStep > (0.0005f * 4096))
|
||||
fProbabilitySizeStep = 0.0005f * 4096;
|
||||
}
|
||||
|
||||
free(pTmpDecompressedData);
|
||||
pTmpDecompressedData = NULL;
|
||||
|
||||
free(pTmpCompressedData);
|
||||
pTmpCompressedData = NULL;
|
||||
|
||||
free(pCompressedData);
|
||||
pCompressedData = NULL;
|
||||
|
||||
free(pGeneratedData);
|
||||
pGeneratedData = NULL;
|
||||
|
||||
fprintf(stdout, "All tests passed.\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) {
|
||||
size_t nFileSize, nMaxCompressedSize;
|
||||
unsigned char *pFileData;
|
||||
unsigned char *pCompressedData;
|
||||
int nFlags;
|
||||
int i;
|
||||
|
||||
nFlags = 0;
|
||||
if (nOptions & OPT_FAVOR_RATIO)
|
||||
nFlags |= LZSA_FLAG_FAVOR_RATIO;
|
||||
if (nOptions & OPT_RAW)
|
||||
nFlags |= LZSA_FLAG_RAW_BLOCK;
|
||||
|
||||
if (pszDictionaryFilename) {
|
||||
fprintf(stderr, "in-memory benchmarking does not support dictionaries\n");
|
||||
return 100;
|
||||
}
|
||||
|
||||
/* Read the whole original file in memory */
|
||||
|
||||
FILE *f_in = fopen(pszInFilename, "rb");
|
||||
if (!f_in) {
|
||||
fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
|
||||
return 100;
|
||||
}
|
||||
|
||||
fseek(f_in, 0, SEEK_END);
|
||||
nFileSize = (size_t)ftell(f_in);
|
||||
fseek(f_in, 0, SEEK_SET);
|
||||
|
||||
pFileData = (unsigned char*)malloc(nFileSize);
|
||||
if (!pFileData) {
|
||||
fclose(f_in);
|
||||
fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize);
|
||||
return 100;
|
||||
}
|
||||
|
||||
if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) {
|
||||
free(pFileData);
|
||||
fclose(f_in);
|
||||
fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename);
|
||||
return 100;
|
||||
}
|
||||
|
||||
fclose(f_in);
|
||||
|
||||
/* Allocate max compressed size */
|
||||
|
||||
nMaxCompressedSize = lzsa_get_max_compressed_size_inmem(nFileSize);
|
||||
|
||||
pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048);
|
||||
if (!pCompressedData) {
|
||||
free(pFileData);
|
||||
fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize);
|
||||
return 100;
|
||||
}
|
||||
|
||||
memset(pCompressedData + 1024, 0, nMaxCompressedSize);
|
||||
|
||||
long long nBestCompTime = -1;
|
||||
|
||||
size_t nActualCompressedSize = 0;
|
||||
size_t nRightGuardPos = nMaxCompressedSize;
|
||||
|
||||
for (i = 0; i < 5; i++) {
|
||||
unsigned char nGuard = 0x33 + i;
|
||||
int j;
|
||||
|
||||
/* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */
|
||||
memset(pCompressedData, nGuard, 1024);
|
||||
memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024);
|
||||
|
||||
long long t0 = do_get_time();
|
||||
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
|
||||
long long t1 = do_get_time();
|
||||
if (nActualCompressedSize == -1) {
|
||||
free(pCompressedData);
|
||||
free(pFileData);
|
||||
fprintf(stderr, "compression error\n");
|
||||
return 100;
|
||||
}
|
||||
|
||||
long long nCurDecTime = t1 - t0;
|
||||
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
|
||||
nBestCompTime = nCurDecTime;
|
||||
|
||||
/* Check guard bytes before the output buffer */
|
||||
for (j = 0; j < 1024; j++) {
|
||||
if (pCompressedData[j] != nGuard) {
|
||||
free(pCompressedData);
|
||||
free(pFileData);
|
||||
fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024);
|
||||
return 100;
|
||||
}
|
||||
}
|
||||
|
||||
/* Check guard bytes after the output buffer */
|
||||
for (j = 0; j < 1024; j++) {
|
||||
if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) {
|
||||
free(pCompressedData);
|
||||
free(pFileData);
|
||||
fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j);
|
||||
return 100;
|
||||
}
|
||||
}
|
||||
|
||||
nRightGuardPos = nActualCompressedSize;
|
||||
}
|
||||
|
||||
if (pszOutFilename) {
|
||||
FILE *f_out;
|
||||
|
||||
/* Write whole compressed file out */
|
||||
|
||||
f_out = fopen(pszOutFilename, "wb");
|
||||
if (f_out) {
|
||||
fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out);
|
||||
fclose(f_out);
|
||||
}
|
||||
}
|
||||
|
||||
free(pCompressedData);
|
||||
free(pFileData);
|
||||
|
||||
fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize);
|
||||
fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0));
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
|
||||
size_t nFileSize, nMaxDecompressedSize;
|
||||
unsigned char *pFileData;
|
||||
unsigned char *pDecompressedData;
|
||||
@ -389,7 +750,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
|
||||
if (nOptions & OPT_RAW)
|
||||
nMaxDecompressedSize = 65536;
|
||||
else
|
||||
nMaxDecompressedSize = lzsa_inmem_get_max_decompressed_size(pFileData, nFileSize);
|
||||
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
|
||||
if (nMaxDecompressedSize == -1) {
|
||||
free(pFileData);
|
||||
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
|
||||
@ -413,7 +774,7 @@ static int do_benchmark(const char *pszInFilename, const char *pszOutFilename, c
|
||||
if (nOptions & OPT_RAW)
|
||||
nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
|
||||
else
|
||||
nActualDecompressedSize = lzsa_inmem_decompress_stream(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
|
||||
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
|
||||
long long t1 = do_get_time();
|
||||
if (nActualDecompressedSize == -1) {
|
||||
free(pDecompressedData);
|
||||
@ -490,7 +851,15 @@ int main(int argc, char **argv) {
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-bench")) {
|
||||
else if (!strcmp(argv[i], "-cbench")) {
|
||||
if (!bCommandDefined) {
|
||||
bCommandDefined = true;
|
||||
cCommand = 'B';
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-dbench")) {
|
||||
if (!bCommandDefined) {
|
||||
bCommandDefined = true;
|
||||
cCommand = 'b';
|
||||
@ -498,6 +867,14 @@ int main(int argc, char **argv) {
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-test")) {
|
||||
if (!bCommandDefined) {
|
||||
bCommandDefined = true;
|
||||
cCommand = 't';
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-D")) {
|
||||
if (!pszDictionaryFilename && (i + 1) < argc) {
|
||||
pszDictionaryFilename = argv[i + 1];
|
||||
@ -616,12 +993,18 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!bArgsError && cCommand == 't') {
|
||||
return do_self_test(nOptions, nMinMatchSize, nFormatVersion);
|
||||
}
|
||||
|
||||
if (bArgsError || !pszInFilename || !pszOutFilename) {
|
||||
fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n");
|
||||
fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] <infile> <outfile>\n", argv[0]);
|
||||
fprintf(stderr, " -c: check resulting stream after compressing\n");
|
||||
fprintf(stderr, " -d: decompress (default: compress)\n");
|
||||
fprintf(stderr, " -bench: benchmary in-memory decompression\n");
|
||||
fprintf(stderr, " -cbench: benchmary in-memory compression\n");
|
||||
fprintf(stderr, " -dbench: benchmary in-memory decompression\n");
|
||||
fprintf(stderr, " -test: run automated self-tests\n");
|
||||
fprintf(stderr, " -v: be verbose\n");
|
||||
fprintf(stderr, " -f <value>: LZSA compression format (1-2)\n");
|
||||
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
|
||||
@ -643,8 +1026,11 @@ int main(int argc, char **argv) {
|
||||
else if (cCommand == 'd') {
|
||||
return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
|
||||
}
|
||||
else if (cCommand == 'B') {
|
||||
return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion);
|
||||
}
|
||||
else if (cCommand == 'b') {
|
||||
return do_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
|
||||
return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion);
|
||||
}
|
||||
else {
|
||||
return 100;
|
||||
|
@ -29,7 +29,6 @@
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "matchfinder.h"
|
||||
@ -45,7 +44,7 @@
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
|
||||
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize) {
|
||||
unsigned int *intervals = pCompressor->intervals;
|
||||
|
||||
/* Build suffix array from input data */
|
||||
@ -170,7 +169,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
|
||||
*
|
||||
* @return number of matches
|
||||
*/
|
||||
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
|
||||
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) {
|
||||
unsigned int *intervals = pCompressor->intervals;
|
||||
unsigned int *pos_data = pCompressor->pos_data;
|
||||
unsigned int ref;
|
||||
@ -248,7 +247,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
* @param nStartOffset current offset in input window (typically 0)
|
||||
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
|
||||
*/
|
||||
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
lzsa_match match;
|
||||
int i;
|
||||
|
||||
@ -267,7 +266,7 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*/
|
||||
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
lzsa_match *pMatch = pCompressor->match + (nStartOffset << MATCHES_PER_OFFSET_SHIFT);
|
||||
int i;
|
||||
|
||||
|
@ -35,7 +35,7 @@
|
||||
|
||||
/* Forward declarations */
|
||||
typedef struct _lzsa_match lzsa_match;
|
||||
typedef struct _lsza_compressor lsza_compressor;
|
||||
typedef struct _lzsa_compressor lzsa_compressor;
|
||||
|
||||
/**
|
||||
* Parse input data, build suffix array and overlaid data structures to speed up match finding
|
||||
@ -46,7 +46,7 @@ typedef struct _lsza_compressor lsza_compressor;
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
|
||||
int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nInWindowSize);
|
||||
|
||||
/**
|
||||
* Find matches at the specified offset in the input window
|
||||
@ -58,7 +58,7 @@ int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned char *p
|
||||
*
|
||||
* @return number of matches
|
||||
*/
|
||||
int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
|
||||
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches);
|
||||
|
||||
/**
|
||||
* Skip previously compressed bytes
|
||||
@ -67,7 +67,7 @@ int lzsa_find_matches_at(lsza_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
* @param nStartOffset current offset in input window (typically 0)
|
||||
* @param nEndOffset offset to skip to in input window (typically the number of previously compressed bytes)
|
||||
*/
|
||||
void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
||||
void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
||||
|
||||
/**
|
||||
* Find all matches for the data to be compressed. Up to NMATCHES_PER_OFFSET matches are stored for each offset, for
|
||||
@ -77,6 +77,6 @@ void lzsa_skip_matches(lsza_compressor *pCompressor, const int nStartOffset, con
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*/
|
||||
void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
||||
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
||||
|
||||
#endif /* _MATCHFINDER_H */
|
||||
|
459
src/shrink_block_v1.c
Normal file
459
src/shrink_block_v1.c
Normal file
@ -0,0 +1,459 @@
|
||||
/*
|
||||
* shrink_v1.c - LZSA1 block compressor implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "lib.h"
|
||||
#include "shrink_block_v1.h"
|
||||
#include "format.h"
|
||||
|
||||
/**
|
||||
* Get the number of extra bits required to represent a literals length
|
||||
*
|
||||
* @param nLength literals length
|
||||
*
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_literals_varlen_size_v1(const int nLength) {
|
||||
if (nLength < LITERALS_RUN_LEN_V1) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if (nLength < 256)
|
||||
return 8;
|
||||
else {
|
||||
if (nLength < 512)
|
||||
return 16;
|
||||
else
|
||||
return 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
|
||||
* room to write the bytes.
|
||||
*
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nOutOffset current write index into output buffer
|
||||
* @param nLength literals length
|
||||
*/
|
||||
static inline int lzsa_write_literals_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
|
||||
if (nLength >= LITERALS_RUN_LEN_V1) {
|
||||
if (nLength < 256)
|
||||
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN_V1;
|
||||
else {
|
||||
if (nLength < 512) {
|
||||
pOutData[nOutOffset++] = 250;
|
||||
pOutData[nOutOffset++] = nLength - 256;
|
||||
}
|
||||
else {
|
||||
pOutData[nOutOffset++] = 249;
|
||||
pOutData[nOutOffset++] = nLength & 0xff;
|
||||
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of extra bits required to represent an encoded match length
|
||||
*
|
||||
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
|
||||
*
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_match_varlen_size_v1(const int nLength) {
|
||||
if (nLength < MATCH_RUN_LEN_V1) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
|
||||
return 8;
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE_V1) < 512)
|
||||
return 16;
|
||||
else
|
||||
return 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
|
||||
* room to write the bytes.
|
||||
*
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nOutOffset current write index into output buffer
|
||||
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V1)
|
||||
*/
|
||||
static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOffset, int nLength) {
|
||||
if (nLength >= MATCH_RUN_LEN_V1) {
|
||||
if ((nLength + MIN_MATCH_SIZE_V1) < 256)
|
||||
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN_V1;
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE_V1) < 512) {
|
||||
pOutData[nOutOffset++] = 239;
|
||||
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V1 - 256;
|
||||
}
|
||||
else {
|
||||
pOutData[nOutOffset++] = 238;
|
||||
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V1) & 0xff;
|
||||
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V1) >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*/
|
||||
static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
||||
int nLastLiteralsOffset;
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
|
||||
int i;
|
||||
|
||||
cost[nEndOffset - 1] = 8;
|
||||
nLastLiteralsOffset = nEndOffset;
|
||||
|
||||
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
|
||||
int nBestCost, nBestMatchLen, nBestMatchOffset;
|
||||
|
||||
int nLiteralsLen = nLastLiteralsOffset - i;
|
||||
nBestCost = 8 + cost[i + 1];
|
||||
if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
|
||||
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
|
||||
* The cost automatically accumulates down the chain. */
|
||||
nBestCost += 8;
|
||||
}
|
||||
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
|
||||
nBestCost += MODESWITCH_PENALTY;
|
||||
nBestMatchLen = 0;
|
||||
nBestMatchOffset = 0;
|
||||
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
int m;
|
||||
|
||||
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
|
||||
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
|
||||
|
||||
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
|
||||
int nCurCost;
|
||||
int nMatchLen = pMatch[m].length;
|
||||
|
||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
|
||||
nCurCost += cost[i + nMatchLen];
|
||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = nMatchLen;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nMatchLen = pMatch[m].length;
|
||||
int k, nMatchRunLen;
|
||||
|
||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||
|
||||
nMatchRunLen = nMatchLen;
|
||||
if (nMatchRunLen > MATCH_RUN_LEN_V1)
|
||||
nMatchRunLen = MATCH_RUN_LEN_V1;
|
||||
|
||||
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
}
|
||||
}
|
||||
|
||||
for (; k <= nMatchLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
|
||||
nLastLiteralsOffset = i;
|
||||
|
||||
cost[i] = nBestCost;
|
||||
pMatch->length = nBestMatchLen;
|
||||
pMatch->offset = nBestMatchOffset;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
|
||||
* impacting the compression ratio
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*
|
||||
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
|
||||
*/
|
||||
static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
int i;
|
||||
int nNumLiterals = 0;
|
||||
int nDidReduce = 0;
|
||||
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
|
||||
int nMatchLen = pMatch->length;
|
||||
int nReduce = 0;
|
||||
|
||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
|
||||
|
||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
|
||||
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
|
||||
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
|
||||
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
|
||||
* match command by literals, the output size will not increase and it will remove one command. */
|
||||
nReduce = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nCurIndex = i + nMatchLen;
|
||||
int nNextNumLiterals = 0;
|
||||
|
||||
do {
|
||||
nCurIndex++;
|
||||
nNextNumLiterals++;
|
||||
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
|
||||
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
|
||||
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
|
||||
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
|
||||
nReduce = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nReduce) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < nMatchLen; j++) {
|
||||
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
|
||||
}
|
||||
nNumLiterals += nMatchLen;
|
||||
i += nMatchLen;
|
||||
|
||||
nDidReduce = 1;
|
||||
}
|
||||
else {
|
||||
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
|
||||
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
|
||||
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
|
||||
/* Join */
|
||||
|
||||
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
|
||||
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
|
||||
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
nNumLiterals = 0;
|
||||
i += nMatchLen;
|
||||
}
|
||||
}
|
||||
else {
|
||||
nNumLiterals++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return nDidReduce;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit block of compressed data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
int i;
|
||||
int nNumLiterals = 0;
|
||||
int nInFirstLiteralOffset = 0;
|
||||
int nOutOffset = 0;
|
||||
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nMatchLen = pMatch->length;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
|
||||
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
|
||||
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
|
||||
|
||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
|
||||
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
||||
|
||||
if (nNumLiterals != 0) {
|
||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||
nOutOffset += nNumLiterals;
|
||||
nNumLiterals = 0;
|
||||
}
|
||||
|
||||
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
||||
if (nTokenLongOffset) {
|
||||
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
||||
}
|
||||
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
|
||||
i += nMatchLen;
|
||||
|
||||
pCompressor->num_commands++;
|
||||
}
|
||||
else {
|
||||
if (nNumLiterals == 0)
|
||||
nInFirstLiteralOffset = i;
|
||||
nNumLiterals++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
|
||||
|
||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
||||
else
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
|
||||
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
||||
|
||||
if (nNumLiterals != 0) {
|
||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||
nOutOffset += nNumLiterals;
|
||||
nNumLiterals = 0;
|
||||
}
|
||||
|
||||
pCompressor->num_commands++;
|
||||
}
|
||||
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||
/* Emit EOD marker for raw block */
|
||||
|
||||
if ((nOutOffset + 4) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = 0;
|
||||
pOutData[nOutOffset++] = 238;
|
||||
pOutData[nOutOffset++] = 0;
|
||||
pOutData[nOutOffset++] = 0;
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
||||
|
||||
int nDidReduce;
|
||||
int nPasses = 0;
|
||||
do {
|
||||
nDidReduce = lzsa_optimize_command_count_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
||||
nPasses++;
|
||||
} while (nDidReduce && nPasses < 20);
|
||||
|
||||
return lzsa_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
|
||||
}
|
53
src/shrink_block_v1.h
Normal file
53
src/shrink_block_v1.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* shrink_v1.h - LZSA1 block compressor definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _SHRINK_BLOCK_V1_H
|
||||
#define _SHRINK_BLOCK_V1_H
|
||||
|
||||
/* Forward declarations */
|
||||
typedef struct _lzsa_compressor lzsa_compressor;
|
||||
|
||||
/**
|
||||
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA1 data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||
|
||||
#endif /* _SHRINK_BLOCK_V1_H */
|
727
src/shrink_block_v2.c
Normal file
727
src/shrink_block_v2.c
Normal file
@ -0,0 +1,727 @@
|
||||
/*
|
||||
* shrink_v2.c - LZSA2 block compressor implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "lib.h"
|
||||
#include "shrink_block_v2.h"
|
||||
#include "format.h"
|
||||
|
||||
/**
|
||||
* Write 4-bit nibble to output (compressed) buffer
|
||||
*
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nOutOffset current write index into output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
* @param nCurNibbleOffset write index into output buffer, of current byte being filled with nibbles
|
||||
* @param nCurFreeNibbles current number of free nibbles in byte
|
||||
* @param nNibbleValue value to write (0..15)
|
||||
*/
|
||||
static int lzsa_write_nibble_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nNibbleValue) {
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if ((*nCurNibbleOffset) == -1) {
|
||||
if (nOutOffset >= nMaxOutDataSize) return -1;
|
||||
(*nCurNibbleOffset) = nOutOffset;
|
||||
(*nCurFreeNibbles) = 2;
|
||||
pOutData[nOutOffset++] = 0;
|
||||
}
|
||||
|
||||
pOutData[*nCurNibbleOffset] = (pOutData[*nCurNibbleOffset] << 4) | (nNibbleValue & 0x0f);
|
||||
(*nCurFreeNibbles)--;
|
||||
if ((*nCurFreeNibbles) == 0) {
|
||||
(*nCurNibbleOffset) = -1;
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of extra bits required to represent a literals length
|
||||
*
|
||||
* @param nLength literals length
|
||||
*
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_literals_varlen_size_v2(const int nLength) {
|
||||
if (nLength < LITERALS_RUN_LEN_V2) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
|
||||
return 4;
|
||||
}
|
||||
else {
|
||||
if (nLength < 256)
|
||||
return 4+8;
|
||||
else {
|
||||
return 4+24;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write extra literals length bytes to output (compressed) buffer. The caller must first check that there is enough
|
||||
* room to write the bytes.
|
||||
*
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nOutOffset current write index into output buffer
|
||||
* @param nLength literals length
|
||||
*/
|
||||
static inline int lzsa_write_literals_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
|
||||
if (nLength >= LITERALS_RUN_LEN_V2) {
|
||||
if (nLength < (LITERALS_RUN_LEN_V2 + 15)) {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - LITERALS_RUN_LEN_V2);
|
||||
}
|
||||
else {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if (nLength < 256)
|
||||
pOutData[nOutOffset++] = nLength - 18;
|
||||
else {
|
||||
pOutData[nOutOffset++] = 239;
|
||||
pOutData[nOutOffset++] = nLength & 0xff;
|
||||
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of extra bits required to represent an encoded match length
|
||||
*
|
||||
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
|
||||
*
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_match_varlen_size_v2(const int nLength) {
|
||||
if (nLength < MATCH_RUN_LEN_V2) {
|
||||
return 0;
|
||||
}
|
||||
else {
|
||||
if (nLength < (MATCH_RUN_LEN_V2 + 15))
|
||||
return 4;
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
|
||||
return 4+8;
|
||||
else {
|
||||
return 4 + 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Write extra encoded match length bytes to output (compressed) buffer. The caller must first check that there is enough
|
||||
* room to write the bytes.
|
||||
*
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nOutOffset current write index into output buffer
|
||||
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE_V2)
|
||||
*/
|
||||
static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOffset, const int nMaxOutDataSize, int *nCurNibbleOffset, int *nCurFreeNibbles, int nLength) {
|
||||
if (nLength >= MATCH_RUN_LEN_V2) {
|
||||
if (nLength < (MATCH_RUN_LEN_V2 + 15)) {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, nLength - MATCH_RUN_LEN_V2);
|
||||
}
|
||||
else {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, nCurNibbleOffset, nCurFreeNibbles, 15);
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if ((nLength + MIN_MATCH_SIZE_V2) < 256)
|
||||
pOutData[nOutOffset++] = nLength + MIN_MATCH_SIZE_V2 - 24;
|
||||
else {
|
||||
pOutData[nOutOffset++] = 233;
|
||||
pOutData[nOutOffset++] = (nLength + MIN_MATCH_SIZE_V2) & 0xff;
|
||||
pOutData[nOutOffset++] = ((nLength + MIN_MATCH_SIZE_V2) >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to pick optimal matches, so as to produce the smallest possible output that decompresses to the same input
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*/
|
||||
static void lzsa_optimize_matches_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
||||
int *prev_match = (int*)pCompressor->intervals; /* Reuse */
|
||||
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
|
||||
lzsa_match *pBestMatch = pCompressor->best_match;
|
||||
int nLastLiteralsOffset;
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
|
||||
int i;
|
||||
|
||||
cost[nEndOffset - 1] = 8;
|
||||
prev_match[nEndOffset - 1] = nEndOffset;
|
||||
nLastLiteralsOffset = nEndOffset;
|
||||
|
||||
pCompressor->best_match[nEndOffset - 1].length = 0;
|
||||
pCompressor->best_match[nEndOffset - 1].offset = 0;
|
||||
|
||||
repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
|
||||
repmatch_opt[nEndOffset - 1].incoming_offset = -1;
|
||||
repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
|
||||
|
||||
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
|
||||
int nLiteralsCost;
|
||||
|
||||
int nLiteralsLen = nLastLiteralsOffset - i;
|
||||
nLiteralsCost = 8 + cost[i + 1];
|
||||
|
||||
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
|
||||
* The cost automatically accumulates down the chain. */
|
||||
if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
|
||||
nLiteralsCost += 4;
|
||||
}
|
||||
else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
|
||||
nLiteralsCost += 8;
|
||||
}
|
||||
else if (nLiteralsLen == 256) {
|
||||
nLiteralsCost += 16;
|
||||
}
|
||||
if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
|
||||
nLiteralsCost += MODESWITCH_PENALTY;
|
||||
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
int m;
|
||||
|
||||
cost[i] = nLiteralsCost;
|
||||
pCompressor->best_match[i].length = 0;
|
||||
pCompressor->best_match[i].offset = 0;
|
||||
|
||||
repmatch_opt[i].best_slot_for_incoming = -1;
|
||||
repmatch_opt[i].incoming_offset = -1;
|
||||
repmatch_opt[i].expected_repmatch = 0;
|
||||
|
||||
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
|
||||
int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
|
||||
|
||||
nBestCost = nLiteralsCost;
|
||||
nBestMatchLen = 0;
|
||||
nBestMatchOffset = 0;
|
||||
nBestUpdatedSlot = -1;
|
||||
nBestUpdatedIndex = -1;
|
||||
nBestExpectedRepMatch = 0;
|
||||
|
||||
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
|
||||
int nCurCost;
|
||||
int nMatchLen = pMatch[m].length;
|
||||
|
||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||
|
||||
int nCurIndex = prev_match[i + nMatchLen];
|
||||
|
||||
int nMatchOffsetSize = 0;
|
||||
int nCurExpectedRepMatch = 1;
|
||||
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
|
||||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
|
||||
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
|
||||
nCurExpectedRepMatch = 0;
|
||||
}
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
|
||||
nCurCost += cost[i + nMatchLen];
|
||||
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = nMatchLen;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
nBestUpdatedSlot = -1;
|
||||
nBestUpdatedIndex = -1;
|
||||
nBestExpectedRepMatch = nCurExpectedRepMatch;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nMatchLen = pMatch[m].length;
|
||||
int k, nMatchRunLen;
|
||||
|
||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||
|
||||
nMatchRunLen = nMatchLen;
|
||||
if (nMatchRunLen > MATCH_RUN_LEN_V2)
|
||||
nMatchRunLen = MATCH_RUN_LEN_V2;
|
||||
|
||||
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
int nCurIndex = prev_match[i + k];
|
||||
int nMatchOffsetSize = 0;
|
||||
int nCurExpectedRepMatch = 1;
|
||||
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
|
||||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
|
||||
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
|
||||
nCurExpectedRepMatch = 0;
|
||||
}
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
int nCurUpdatedSlot = -1;
|
||||
int nCurUpdatedIndex = -1;
|
||||
|
||||
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
|
||||
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
|
||||
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
|
||||
|
||||
if (nAltCost <= nCurCost) {
|
||||
nCurUpdatedSlot = r;
|
||||
nCurUpdatedIndex = nCurIndex;
|
||||
nCurCost = nAltCost;
|
||||
nCurExpectedRepMatch = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
nBestUpdatedSlot = nCurUpdatedSlot;
|
||||
nBestUpdatedIndex = nCurUpdatedIndex;
|
||||
nBestExpectedRepMatch = nCurExpectedRepMatch;
|
||||
}
|
||||
}
|
||||
|
||||
for (; k <= nMatchLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
int nCurIndex = prev_match[i + k];
|
||||
int nMatchOffsetSize = 0;
|
||||
int nCurExpectedRepMatch = 1;
|
||||
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
|
||||
pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
|
||||
nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
|
||||
nCurExpectedRepMatch = 0;
|
||||
}
|
||||
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
int nCurUpdatedSlot = -1;
|
||||
int nCurUpdatedIndex = -1;
|
||||
|
||||
if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
|
||||
int r;
|
||||
|
||||
for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
|
||||
if (pCompressor->match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
|
||||
int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
|
||||
|
||||
if (nAltCost <= nCurCost) {
|
||||
nCurUpdatedSlot = r;
|
||||
nCurUpdatedIndex = nCurIndex;
|
||||
nCurCost = nAltCost;
|
||||
nCurExpectedRepMatch = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
nBestUpdatedSlot = nCurUpdatedSlot;
|
||||
nBestUpdatedIndex = nCurUpdatedIndex;
|
||||
nBestExpectedRepMatch = nCurExpectedRepMatch;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pSlotCost[m] = nBestCost;
|
||||
pMatch[m].length = nBestMatchLen;
|
||||
pMatch[m].offset = nBestMatchOffset; /* not necessary */
|
||||
|
||||
if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
|
||||
cost[i] = nBestCost;
|
||||
pCompressor->best_match[i].length = nBestMatchLen;
|
||||
pCompressor->best_match[i].offset = nBestMatchOffset;
|
||||
|
||||
repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
|
||||
|
||||
if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
|
||||
repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
|
||||
repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
for (; m < NMATCHES_PER_OFFSET; m++) {
|
||||
pSlotCost[m] = 0;
|
||||
}
|
||||
|
||||
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
|
||||
nLastLiteralsOffset = i;
|
||||
|
||||
prev_match[i] = nLastLiteralsOffset;
|
||||
}
|
||||
|
||||
int nIncomingOffset = -1;
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
|
||||
if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
|
||||
int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
|
||||
|
||||
pCompressor->best_match[i].length = pMatch->length;
|
||||
pCompressor->best_match[i].offset = pMatch->offset;
|
||||
cost[i] = *pSlotCost;
|
||||
|
||||
if (repmatch_opt[i].expected_repmatch == 2)
|
||||
repmatch_opt[i].expected_repmatch = 1;
|
||||
}
|
||||
else {
|
||||
if (repmatch_opt[i].expected_repmatch == 2)
|
||||
repmatch_opt[i].expected_repmatch = 0;
|
||||
}
|
||||
|
||||
nIncomingOffset = i;
|
||||
i += pCompressor->best_match[i].length;
|
||||
}
|
||||
else {
|
||||
i++;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
|
||||
* impacting the compression ratio
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
*
|
||||
* @return non-zero if the number of tokens was reduced, 0 if it wasn't
|
||||
*/
|
||||
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
int i;
|
||||
int nNumLiterals = 0;
|
||||
int nDidReduce = 0;
|
||||
int nPreviousMatchOffset = -1;
|
||||
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
|
||||
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pCompressor->best_match + i;
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
|
||||
int nMatchLen = pMatch->length;
|
||||
int nReduce = 0;
|
||||
int nCurrentMatchOffset = i;
|
||||
|
||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
|
||||
int nUndoRepMatchCost = (nPreviousMatchOffset < 0 || !repmatch_opt[nPreviousMatchOffset].expected_repmatch) ? 0 : 16;
|
||||
|
||||
if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2) {
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;
|
||||
|
||||
if (pCompressor->best_match[i + nMatchLen].offset != nMatchOffset) {
|
||||
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
|
||||
}
|
||||
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nMatchLen))) {
|
||||
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
|
||||
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
|
||||
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
|
||||
* match command by literals, the output size will not increase and it will remove one command. */
|
||||
nReduce = 1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
int nCurIndex = i + nMatchLen;
|
||||
int nNextNumLiterals = 0;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(nEncodedMatchLen) - nUndoRepMatchCost;;
|
||||
|
||||
do {
|
||||
nCurIndex++;
|
||||
nNextNumLiterals++;
|
||||
} while (nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2);
|
||||
|
||||
if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
|
||||
pCompressor->best_match[nCurIndex].offset != nMatchOffset) {
|
||||
nCommandSize += (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)) /* match offset */;
|
||||
}
|
||||
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v2(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v2(nNextNumLiterals))) {
|
||||
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
|
||||
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
|
||||
nReduce = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nReduce) {
|
||||
int j;
|
||||
|
||||
for (j = 0; j < nMatchLen; j++) {
|
||||
pCompressor->best_match[i + j].length = 0;
|
||||
}
|
||||
nNumLiterals += nMatchLen;
|
||||
i += nMatchLen;
|
||||
|
||||
nDidReduce = 1;
|
||||
|
||||
if (nPreviousMatchOffset >= 0) {
|
||||
repmatch_opt[nPreviousMatchOffset].expected_repmatch = 0;
|
||||
nPreviousMatchOffset = -1;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
|
||||
pMatch->offset && pMatch->offset <= 32 && pCompressor->best_match[i + nMatchLen].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
|
||||
(nMatchLen + pCompressor->best_match[i + nMatchLen].length) <= MAX_VARLEN) {
|
||||
/* Join */
|
||||
|
||||
pMatch->length += pCompressor->best_match[i + nMatchLen].length;
|
||||
pCompressor->best_match[i + nMatchLen].offset = 0;
|
||||
pCompressor->best_match[i + nMatchLen].length = -1;
|
||||
continue;
|
||||
}
|
||||
|
||||
nNumLiterals = 0;
|
||||
i += nMatchLen;
|
||||
}
|
||||
|
||||
nPreviousMatchOffset = nCurrentMatchOffset;
|
||||
}
|
||||
else {
|
||||
nNumLiterals++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return nDidReduce;
|
||||
}
|
||||
|
||||
/**
|
||||
* Emit block of compressed data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
int i;
|
||||
int nNumLiterals = 0;
|
||||
int nInFirstLiteralOffset = 0;
|
||||
int nOutOffset = 0;
|
||||
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
|
||||
int nRepMatchOffset = 0;
|
||||
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
|
||||
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pCompressor->best_match + i;
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nMatchLen = pMatch->length;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2;
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
|
||||
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V2) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen;
|
||||
int nTokenOffsetMode;
|
||||
int nOffsetSize;
|
||||
|
||||
if (nMatchOffset == nRepMatchOffset) {
|
||||
nTokenOffsetMode = 0xe0;
|
||||
nOffsetSize = 0;
|
||||
}
|
||||
else {
|
||||
if (nMatchOffset <= 32) {
|
||||
nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
|
||||
nOffsetSize = 4;
|
||||
}
|
||||
else if (nMatchOffset <= 512) {
|
||||
nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
|
||||
nOffsetSize = 8;
|
||||
}
|
||||
else if (nMatchOffset <= (8192 + 512)) {
|
||||
nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
|
||||
nOffsetSize = 12;
|
||||
}
|
||||
else {
|
||||
nTokenOffsetMode = 0xc0;
|
||||
nOffsetSize = 16;
|
||||
}
|
||||
}
|
||||
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2(nEncodedMatchLen);
|
||||
|
||||
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = nTokenOffsetMode | (nTokenLiteralsLen << 3) | nTokenMatchLen;
|
||||
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if (nNumLiterals != 0) {
|
||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||
nOutOffset += nNumLiterals;
|
||||
nNumLiterals = 0;
|
||||
}
|
||||
|
||||
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
|
||||
if (nOutOffset < 0) return -1;
|
||||
}
|
||||
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
|
||||
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
||||
}
|
||||
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
|
||||
if (nOutOffset < 0) return -1;
|
||||
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
|
||||
}
|
||||
else if (nTokenOffsetMode == 0xc0) {
|
||||
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
||||
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
||||
}
|
||||
nRepMatchOffset = nMatchOffset;
|
||||
|
||||
nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
i += nMatchLen;
|
||||
|
||||
pCompressor->num_commands++;
|
||||
}
|
||||
else {
|
||||
if (nNumLiterals == 0)
|
||||
nInFirstLiteralOffset = i;
|
||||
nNumLiterals++;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
{
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3);
|
||||
|
||||
if ((nOutOffset + ((nCommandSize + 7) >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x47;
|
||||
else
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 3) | 0x00;
|
||||
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if (nNumLiterals != 0) {
|
||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||
nOutOffset += nNumLiterals;
|
||||
nNumLiterals = 0;
|
||||
}
|
||||
|
||||
pCompressor->num_commands++;
|
||||
}
|
||||
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||
/* Emit EOD marker for raw block */
|
||||
|
||||
if (nOutOffset >= nMaxOutDataSize)
|
||||
return -1;
|
||||
pOutData[nOutOffset++] = 0; /* Match offset */
|
||||
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 15); /* Extended match length nibble */
|
||||
if (nOutOffset < 0) return -1;
|
||||
|
||||
if ((nOutOffset + 1) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = 232; /* EOD match length byte */
|
||||
}
|
||||
|
||||
if (nCurNibbleOffset != -1) {
|
||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, 0);
|
||||
if (nOutOffset < 0 || nCurNibbleOffset != -1)
|
||||
return -1;
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
lzsa_optimize_matches_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
||||
|
||||
int nDidReduce;
|
||||
int nPasses = 0;
|
||||
do {
|
||||
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
||||
nPasses++;
|
||||
} while (nDidReduce && nPasses < 20);
|
||||
|
||||
return lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
|
||||
}
|
53
src/shrink_block_v2.h
Normal file
53
src/shrink_block_v2.h
Normal file
@ -0,0 +1,53 @@
|
||||
/*
|
||||
* shrink_v2.h - LZSA2 block compressor definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _SHRINK_BLOCK_V2_H
|
||||
#define _SHRINK_BLOCK_V2_H
|
||||
|
||||
/* Forward declarations */
|
||||
typedef struct _lzsa_compressor lzsa_compressor;
|
||||
|
||||
/**
|
||||
* Select the most optimal matches, reduce the token count if possible, and then emit a block of compressed LZSA2 data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
|
||||
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||
|
||||
#endif /* _SHRINK_BLOCK_V2_H */
|
194
src/shrink_context.c
Normal file
194
src/shrink_context.c
Normal file
@ -0,0 +1,194 @@
|
||||
/*
|
||||
* shrink_context.c - compression context implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "shrink_context.h"
|
||||
#include "shrink_block_v1.h"
|
||||
#include "shrink_block_v2.h"
|
||||
#include "format.h"
|
||||
#include "matchfinder.h"
|
||||
|
||||
/**
|
||||
* Initialize compression context
|
||||
*
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
* @param nFlags compression flags
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags) {
|
||||
int nResult;
|
||||
int nMinMatchSizeForFormat = (nFormatVersion == 1) ? MIN_MATCH_SIZE_V1 : MIN_MATCH_SIZE_V2;
|
||||
|
||||
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
||||
pCompressor->intervals = NULL;
|
||||
pCompressor->pos_data = NULL;
|
||||
pCompressor->open_intervals = NULL;
|
||||
pCompressor->match = NULL;
|
||||
pCompressor->best_match = NULL;
|
||||
pCompressor->slot_cost = NULL;
|
||||
pCompressor->repmatch_opt = NULL;
|
||||
pCompressor->min_match_size = nMinMatchSize;
|
||||
if (pCompressor->min_match_size < nMinMatchSizeForFormat)
|
||||
pCompressor->min_match_size = nMinMatchSizeForFormat;
|
||||
else if (pCompressor->min_match_size > 5)
|
||||
pCompressor->min_match_size = 5;
|
||||
pCompressor->format_version = nFormatVersion;
|
||||
pCompressor->flags = nFlags;
|
||||
pCompressor->num_commands = 0;
|
||||
|
||||
if (!nResult) {
|
||||
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->intervals) {
|
||||
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->pos_data) {
|
||||
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->open_intervals) {
|
||||
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
|
||||
|
||||
if (pCompressor->match) {
|
||||
if (pCompressor->format_version == 2) {
|
||||
pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
|
||||
|
||||
if (pCompressor->best_match) {
|
||||
pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
|
||||
|
||||
if (pCompressor->slot_cost) {
|
||||
pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
|
||||
|
||||
if (pCompressor->repmatch_opt)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
lzsa_compressor_destroy(pCompressor);
|
||||
return 100;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up compression context and free up any associated resources
|
||||
*
|
||||
* @param pCompressor compression context to clean up
|
||||
*/
|
||||
void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
|
||||
divsufsort_destroy(&pCompressor->divsufsort_context);
|
||||
|
||||
if (pCompressor->repmatch_opt) {
|
||||
free(pCompressor->repmatch_opt);
|
||||
pCompressor->repmatch_opt = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->slot_cost) {
|
||||
free(pCompressor->slot_cost);
|
||||
pCompressor->slot_cost = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->best_match) {
|
||||
free(pCompressor->best_match);
|
||||
pCompressor->best_match = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->match) {
|
||||
free(pCompressor->match);
|
||||
pCompressor->match = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->open_intervals) {
|
||||
free(pCompressor->open_intervals);
|
||||
pCompressor->open_intervals = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->pos_data) {
|
||||
free(pCompressor->pos_data);
|
||||
pCompressor->pos_data = NULL;
|
||||
}
|
||||
|
||||
if (pCompressor->intervals) {
|
||||
free(pCompressor->intervals);
|
||||
pCompressor->intervals = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress one block of data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
|
||||
* @param nInDataSize number of input bytes to compress
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
|
||||
return -1;
|
||||
if (nPreviousBlockSize) {
|
||||
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
|
||||
}
|
||||
lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
||||
|
||||
if (pCompressor->format_version == 1) {
|
||||
return lzsa_optimize_and_write_block_v1(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
|
||||
}
|
||||
else if (pCompressor->format_version == 2) {
|
||||
return lzsa_optimize_and_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nInDataSize, pOutData, nMaxOutDataSize);
|
||||
}
|
||||
else {
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of compression commands issued in compressed data blocks
|
||||
*
|
||||
* @return number of commands
|
||||
*/
|
||||
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor) {
|
||||
return pCompressor->num_commands;
|
||||
}
|
123
src/shrink_context.h
Normal file
123
src/shrink_context.h
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
* shrink_context.h - compression context definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _SHRINK_CONTEXT_H
|
||||
#define _SHRINK_CONTEXT_H
|
||||
|
||||
#include "divsufsort.h"
|
||||
|
||||
#define LCP_BITS 15
|
||||
#define LCP_MAX (1U<<(LCP_BITS - 1))
|
||||
#define LCP_SHIFT (32-LCP_BITS)
|
||||
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
|
||||
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
|
||||
|
||||
#define NMATCHES_PER_OFFSET 8
|
||||
#define MATCHES_PER_OFFSET_SHIFT 3
|
||||
|
||||
#define LEAVE_ALONE_MATCH_SIZE 1000
|
||||
|
||||
#define LAST_MATCH_OFFSET 4
|
||||
#define LAST_LITERALS 1
|
||||
|
||||
#define MODESWITCH_PENALTY 1
|
||||
|
||||
/** One match */
|
||||
typedef struct _lzsa_match {
|
||||
unsigned short length;
|
||||
unsigned short offset;
|
||||
} lzsa_match;
|
||||
|
||||
/** One rep-match slot (for LZSA2) */
|
||||
typedef struct _lzsa_repmatch_opt {
|
||||
int incoming_offset;
|
||||
short best_slot_for_incoming;
|
||||
short expected_repmatch;
|
||||
} lzsa_repmatch_opt;
|
||||
|
||||
/** Compression context */
|
||||
typedef struct _lzsa_compressor {
|
||||
divsufsort_ctx_t divsufsort_context;
|
||||
unsigned int *intervals;
|
||||
unsigned int *pos_data;
|
||||
unsigned int *open_intervals;
|
||||
lzsa_match *match;
|
||||
lzsa_match *best_match;
|
||||
int *slot_cost;
|
||||
lzsa_repmatch_opt *repmatch_opt;
|
||||
int min_match_size;
|
||||
int format_version;
|
||||
int flags;
|
||||
int num_commands;
|
||||
} lzsa_compressor;
|
||||
|
||||
/**
|
||||
* Initialize compression context
|
||||
*
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
* @param nFlags compression flags
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
|
||||
|
||||
/**
|
||||
* Clean up compression context and free up any associated resources
|
||||
*
|
||||
* @param pCompressor compression context to clean up
|
||||
*/
|
||||
void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
|
||||
|
||||
/**
|
||||
* Compress one block of data
|
||||
*
|
||||
* @param pCompressor compression context
|
||||
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
|
||||
* @param nInDataSize number of input bytes to compress
|
||||
* @param pOutData pointer to output buffer
|
||||
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
||||
*
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||
|
||||
/**
|
||||
* Get the number of compression commands issued in compressed data blocks
|
||||
*
|
||||
* @return number of commands
|
||||
*/
|
||||
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
|
||||
|
||||
#endif /* _SHRINK_CONTEXT_H */
|
178
src/shrink_inmem.c
Normal file
178
src/shrink_inmem.c
Normal file
@ -0,0 +1,178 @@
|
||||
/*
|
||||
* shrink_inmem.c - in-memory compression implementation
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "shrink_inmem.h"
|
||||
#include "shrink_context.h"
|
||||
#include "frame.h"
|
||||
#include "format.h"
|
||||
#include "lib.h"
|
||||
|
||||
/**
|
||||
* Get maximum compressed size of input(source) data
|
||||
*
|
||||
* @param pFileData pointer to input(source) data
|
||||
* @param nFileSize input(source) size in bytes
|
||||
*
|
||||
* @return maximum compressed size
|
||||
*/
|
||||
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize) {
|
||||
return lzsa_get_header_size() + ((nInputSize + (BLOCK_SIZE - 1)) >> 16) * lzsa_get_frame_size() + nInputSize + lzsa_get_frame_size() /* footer */;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compress memory
|
||||
*
|
||||
* @param pInputData pointer to input(source) data to compress
|
||||
* @param pOutBuffer buffer for compressed data
|
||||
* @param nInputSize input(source) size in bytes
|
||||
* @param nMaxOutBufferSize maximum capacity of compression buffer
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
*
|
||||
* @return actual compressed size, or -1 for error
|
||||
*/
|
||||
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion) {
|
||||
lzsa_compressor compressor;
|
||||
size_t nOriginalSize = 0;
|
||||
size_t nCompressedSize = 0L;
|
||||
int nResult;
|
||||
int nError = 0;
|
||||
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
|
||||
if (nResult != 0) {
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
int nHeaderSize = lzsa_encode_header(pOutBuffer, (int)nMaxOutBufferSize, nFormatVersion);
|
||||
if (nHeaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
nCompressedSize += nHeaderSize;
|
||||
}
|
||||
}
|
||||
|
||||
int nPreviousBlockSize = 0;
|
||||
int nNumBlocks = 0;
|
||||
|
||||
while (nOriginalSize < nInputSize && !nError) {
|
||||
int nInDataSize;
|
||||
|
||||
nInDataSize = (int)(nInputSize - nOriginalSize);
|
||||
if (nInDataSize > BLOCK_SIZE)
|
||||
nInDataSize = BLOCK_SIZE;
|
||||
|
||||
if (nInDataSize > 0) {
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
|
||||
nError = LZSA_ERROR_RAW_TOOLARGE;
|
||||
break;
|
||||
}
|
||||
|
||||
int nOutDataSize;
|
||||
int nOutDataEnd = (int)(nMaxOutBufferSize - (lzsa_get_frame_size() + nCompressedSize + lzsa_get_frame_size() /* footer */));
|
||||
|
||||
if (nOutDataEnd > BLOCK_SIZE)
|
||||
nOutDataEnd = BLOCK_SIZE;
|
||||
|
||||
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInputData + nOriginalSize - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutBuffer + lzsa_get_frame_size() + nCompressedSize, nOutDataEnd);
|
||||
if (nOutDataSize >= 0) {
|
||||
/* Write compressed block */
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
int nBlockheaderSize = lzsa_encode_compressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nOutDataSize);
|
||||
if (nBlockheaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
nCompressedSize += nBlockheaderSize;
|
||||
|
||||
nOriginalSize += nInDataSize;
|
||||
nCompressedSize += nOutDataSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Write uncompressible, literal block */
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
|
||||
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
|
||||
break;
|
||||
}
|
||||
|
||||
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize), nInDataSize);
|
||||
if (nBlockheaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
|
||||
nError = LZSA_ERROR_DST;
|
||||
else {
|
||||
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
|
||||
|
||||
nOriginalSize += nInDataSize;
|
||||
nCompressedSize += nBlockheaderSize + nInDataSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nPreviousBlockSize = nInDataSize;
|
||||
nNumBlocks++;
|
||||
}
|
||||
}
|
||||
|
||||
if (!nError) {
|
||||
int nFooterSize;
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
|
||||
nFooterSize = 0;
|
||||
}
|
||||
else {
|
||||
nFooterSize = lzsa_encode_footer_frame(pOutBuffer + nCompressedSize, (int)(nMaxOutBufferSize - nCompressedSize));
|
||||
if (nFooterSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
}
|
||||
|
||||
nCompressedSize += nFooterSize;
|
||||
}
|
||||
|
||||
lzsa_compressor_destroy(&compressor);
|
||||
|
||||
if (nError) {
|
||||
return -1;
|
||||
}
|
||||
else {
|
||||
return nCompressedSize;
|
||||
}
|
||||
}
|
||||
|
64
src/shrink_inmem.h
Normal file
64
src/shrink_inmem.h
Normal file
@ -0,0 +1,64 @@
|
||||
/*
|
||||
* shrink_inmem.h - in-memory compression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _SHRINK_INMEM_H
|
||||
#define _SHRINK_INMEM_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
/**
|
||||
* Get maximum compressed size of input(source) data
|
||||
*
|
||||
* @param pFileData pointer to input(source) data
|
||||
* @param nFileSize input(source) size in bytes
|
||||
*
|
||||
* @return maximum compressed size
|
||||
*/
|
||||
size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
|
||||
|
||||
/**
|
||||
* Compress memory
|
||||
*
|
||||
* @param pInputData pointer to input(source) data to compress
|
||||
* @param pOutBuffer buffer for compressed data
|
||||
* @param nInputSize input(source) size in bytes
|
||||
* @param nMaxOutBufferSize maximum capacity of compression buffer
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
*
|
||||
* @return actual compressed size, or -1 for error
|
||||
*/
|
||||
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
|
||||
|
||||
#endif /* _SHRINK_INMEM_H */
|
285
src/shrink_streaming.c
Normal file
285
src/shrink_streaming.c
Normal file
@ -0,0 +1,285 @@
|
||||
/*
|
||||
* shrink_streaming.h - streaming compression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "shrink_streaming.h"
|
||||
#include "format.h"
|
||||
#include "frame.h"
|
||||
#include "lib.h"
|
||||
|
||||
/*-------------- File API -------------- */
|
||||
|
||||
/**
|
||||
* Compress file
|
||||
*
|
||||
* @param pszInFilename name of input(source) file to compress
|
||||
* @param pszOutFilename name of output(compressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
|
||||
lzsa_stream_t inStream, outStream;
|
||||
void *pDictionaryData = NULL;
|
||||
int nDictionaryDataSize = 0;
|
||||
lzsa_status_t nStatus;
|
||||
|
||||
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
|
||||
return LZSA_ERROR_SRC;
|
||||
}
|
||||
|
||||
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
|
||||
inStream.close(&inStream);
|
||||
return LZSA_ERROR_DST;
|
||||
}
|
||||
|
||||
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
|
||||
|
||||
if (nStatus) {
|
||||
outStream.close(&outStream);
|
||||
inStream.close(&inStream);
|
||||
return nStatus;
|
||||
}
|
||||
|
||||
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount);
|
||||
|
||||
lzsa_dictionary_free(&pDictionaryData);
|
||||
outStream.close(&outStream);
|
||||
inStream.close(&inStream);
|
||||
return nStatus;
|
||||
}
|
||||
|
||||
/*-------------- Streaming API -------------- */
|
||||
|
||||
/**
|
||||
* Compress stream
|
||||
*
|
||||
* @param pInStream input(source) stream to compress
|
||||
* @param pOutStream output(compressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
|
||||
unsigned char *pInData, *pOutData;
|
||||
lzsa_compressor compressor;
|
||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||
int nResult;
|
||||
unsigned char cFrameData[16];
|
||||
int nError = 0;
|
||||
|
||||
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
|
||||
if (!pInData) {
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
memset(pInData, 0, BLOCK_SIZE * 2);
|
||||
|
||||
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
|
||||
if (!pOutData) {
|
||||
free(pInData);
|
||||
pInData = NULL;
|
||||
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
memset(pOutData, 0, BLOCK_SIZE);
|
||||
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
|
||||
if (nResult != 0) {
|
||||
free(pOutData);
|
||||
pOutData = NULL;
|
||||
|
||||
free(pInData);
|
||||
pInData = NULL;
|
||||
|
||||
return LZSA_ERROR_MEMORY;
|
||||
}
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
|
||||
if (nHeaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
|
||||
nError = LZSA_ERROR_DST;
|
||||
nCompressedSize += (long long)nHeaderSize;
|
||||
}
|
||||
}
|
||||
|
||||
int nPreviousBlockSize = 0;
|
||||
int nNumBlocks = 0;
|
||||
|
||||
while (!pInStream->eof(pInStream) && !nError) {
|
||||
int nInDataSize;
|
||||
|
||||
if (nPreviousBlockSize) {
|
||||
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
|
||||
}
|
||||
else if (nDictionaryDataSize && pDictionaryData) {
|
||||
nPreviousBlockSize = nDictionaryDataSize;
|
||||
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
|
||||
}
|
||||
|
||||
nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
|
||||
if (nInDataSize > 0) {
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
|
||||
nError = LZSA_ERROR_RAW_TOOLARGE;
|
||||
break;
|
||||
}
|
||||
nDictionaryDataSize = 0;
|
||||
|
||||
int nOutDataSize;
|
||||
|
||||
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
|
||||
if (nOutDataSize >= 0) {
|
||||
/* Write compressed block */
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
|
||||
int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
|
||||
if (nBlockheaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
nCompressedSize += (long long)nBlockheaderSize;
|
||||
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
|
||||
nError = LZSA_ERROR_DST;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!nError) {
|
||||
if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
|
||||
nError = LZSA_ERROR_DST;
|
||||
}
|
||||
else {
|
||||
nOriginalSize += (long long)nInDataSize;
|
||||
nCompressedSize += (long long)nOutDataSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
/* Write uncompressible, literal block */
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
|
||||
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
|
||||
break;
|
||||
}
|
||||
|
||||
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
|
||||
if (nBlockheaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
|
||||
nError = LZSA_ERROR_DST;
|
||||
}
|
||||
else {
|
||||
if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
|
||||
nError = LZSA_ERROR_DST;
|
||||
}
|
||||
else {
|
||||
nOriginalSize += (long long)nInDataSize;
|
||||
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
nPreviousBlockSize = nInDataSize;
|
||||
nNumBlocks++;
|
||||
}
|
||||
|
||||
if (!nError && !pInStream->eof(pInStream)) {
|
||||
if (progress)
|
||||
progress(nOriginalSize, nCompressedSize);
|
||||
}
|
||||
}
|
||||
|
||||
if (!nError) {
|
||||
int nFooterSize;
|
||||
|
||||
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
|
||||
nFooterSize = 0;
|
||||
}
|
||||
else {
|
||||
nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
|
||||
if (nFooterSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
}
|
||||
|
||||
if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
|
||||
nError = LZSA_ERROR_DST;
|
||||
nCompressedSize += (long long)nFooterSize;
|
||||
}
|
||||
|
||||
if (progress)
|
||||
progress(nOriginalSize, nCompressedSize);
|
||||
|
||||
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
|
||||
lzsa_compressor_destroy(&compressor);
|
||||
|
||||
free(pOutData);
|
||||
pOutData = NULL;
|
||||
|
||||
free(pInData);
|
||||
pInData = NULL;
|
||||
|
||||
if (nError) {
|
||||
return nError;
|
||||
}
|
||||
else {
|
||||
if (pOriginalSize)
|
||||
*pOriginalSize = nOriginalSize;
|
||||
if (pCompressedSize)
|
||||
*pCompressedSize = nCompressedSize;
|
||||
if (pCommandCount)
|
||||
*pCommandCount = nCommandCount;
|
||||
return LZSA_OK;
|
||||
}
|
||||
}
|
86
src/shrink_streaming.h
Normal file
86
src/shrink_streaming.h
Normal file
@ -0,0 +1,86 @@
|
||||
/*
|
||||
* shrink_streaming.h - streaming compression definitions
|
||||
*
|
||||
* Copyright (C) 2019 Emmanuel Marty
|
||||
*
|
||||
* This software is provided 'as-is', without any express or implied
|
||||
* warranty. In no event will the authors be held liable for any damages
|
||||
* arising from the use of this software.
|
||||
*
|
||||
* Permission is granted to anyone to use this software for any purpose,
|
||||
* including commercial applications, and to alter it and redistribute it
|
||||
* freely, subject to the following restrictions:
|
||||
*
|
||||
* 1. The origin of this software must not be misrepresented; you must not
|
||||
* claim that you wrote the original software. If you use this software
|
||||
* in a product, an acknowledgment in the product documentation would be
|
||||
* appreciated but is not required.
|
||||
* 2. Altered source versions must be plainly marked as such, and must not be
|
||||
* misrepresented as being the original software.
|
||||
* 3. This notice may not be removed or altered from any source distribution.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
||||
*
|
||||
*/
|
||||
|
||||
#ifndef _SHRINK_STREAMING_H
|
||||
#define _SHRINK_STREAMING_H
|
||||
|
||||
#include "stream.h"
|
||||
|
||||
/* Forward declaration */
|
||||
typedef enum _lzsa_status_t lzsa_status_t;
|
||||
|
||||
/*-------------- File API -------------- */
|
||||
|
||||
/**
|
||||
* Compress file
|
||||
*
|
||||
* @param pszInFilename name of input(source) file to compress
|
||||
* @param pszOutFilename name of output(compressed) file to generate
|
||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
||||
|
||||
/*-------------- Streaming API -------------- */
|
||||
|
||||
/**
|
||||
* Compress stream
|
||||
*
|
||||
* @param pInStream input(source) stream to compress
|
||||
* @param pOutStream output(compressed) stream to write to
|
||||
* @param pDictionaryData dictionary contents, or NULL for none
|
||||
* @param nDictionaryDataSize size of dictionary contents, or 0
|
||||
* @param nFlags compression flags (LZSA_FLAG_xxx)
|
||||
* @param nMinMatchSize minimum match size
|
||||
* @param nFormatVersion version of format to use (1-2)
|
||||
* @param progress progress function, called after compressing each block, or NULL for none
|
||||
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
|
||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||
*
|
||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||
*/
|
||||
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
||||
|
||||
#endif /* _SHRINK_STREAMING_H */
|
Loading…
x
Reference in New Issue
Block a user