Move top-level streaming compression code to library

This commit is contained in:
emmanuel-marty 2019-05-13 22:22:53 +02:00
parent 0f4ffa7166
commit 277b5b1025
12 changed files with 1018 additions and 686 deletions

View File

@ -11,8 +11,9 @@ $(OBJDIR)/%.o: src/../%.c
APP := lzsa
OBJS := $(OBJDIR)/src/lzsa.o
OBJS += $(OBJDIR)/src/frame.o
OBJS += $(OBJDIR)/src/lib.o
OBJS += $(OBJDIR)/src/stream.o
OBJS += $(OBJDIR)/src/frame.o
OBJS += $(OBJDIR)/src/matchfinder.o
OBJS += $(OBJDIR)/src/shrink_v1.o
OBJS += $(OBJDIR)/src/shrink_v2.o

View File

@ -188,6 +188,7 @@
<ClInclude Include="..\src\matchfinder.h" />
<ClInclude Include="..\src\shrink_v1.h" />
<ClInclude Include="..\src\shrink_v2.h" />
<ClInclude Include="..\src\stream.h" />
<ClInclude Include="pch.h" />
</ItemGroup>
<ItemGroup>
@ -203,6 +204,7 @@
<ClCompile Include="..\src\matchfinder.c" />
<ClCompile Include="..\src\shrink_v1.c" />
<ClCompile Include="..\src\shrink_v2.c" />
<ClCompile Include="..\src\stream.c" />
</ItemGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
<ImportGroup Label="ExtensionTargets">

View File

@ -60,6 +60,9 @@
<ClInclude Include="..\src\lib.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
<ClInclude Include="..\src\stream.h">
<Filter>Fichiers sources</Filter>
</ClInclude>
</ItemGroup>
<ItemGroup>
<ClCompile Include="..\src\libdivsufsort\lib\utils.c">
@ -98,5 +101,8 @@
<ClCompile Include="..\src\lzsa.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
<ClCompile Include="..\src\stream.c">
<Filter>Fichiers sources</Filter>
</ClCompile>
</ItemGroup>
</Project>

View File

@ -179,7 +179,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **p
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset;

View File

@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V1_H */

View File

@ -184,7 +184,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
const unsigned char *pInBlockEnd = pInBlock + nBlockSize;
const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8;
unsigned char *pCurOutData = pOutData + nOutDataOffset;

View File

@ -44,6 +44,6 @@
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _EXPAND_V2_H */

524
src/lib.c
View File

@ -40,6 +40,522 @@
#include "expand_v1.h"
#include "expand_v2.h"
#include "format.h"
#include "frame.h"
#define BLOCK_SIZE 65536
/*-------------- Top level API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lsza_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
lzsa_stream_t inStream, outStream;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
return LZSA_ERROR_SRC;
}
if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) {
inStream.close(&inStream);
return LZSA_ERROR_DST;
}
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize);
lzsa_dictionary_free(&pDictionaryData);
outStream.close(&outStream);
inStream.close(&inStream);
return nStatus;
}
/*-------------- Streaming API -------------- */
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) {
unsigned char *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE);
if (!pDictionaryData) {
return LZSA_ERROR_MEMORY;
}
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pDictionaryData);
pDictionaryData = NULL;
return LZSA_ERROR_DICTIONARY;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
*ppDictionaryData = pDictionaryData;
*pDictionaryDataSize = nDictionaryDataSize;
return LZSA_OK;
}
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {
free(*ppDictionaryData);
ppDictionaryData = NULL;
}
}
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
unsigned char *pInData, *pOutData;
lsza_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
unsigned char cFrameData[16];
int nError = 0;
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
return LZSA_ERROR_MEMORY;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
memset(pOutData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
return LZSA_ERROR_MEMORY;
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nHeaderSize;
}
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPreviousBlockSize = nDictionaryDataSize;
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize);
}
nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE);
if (nInDataSize > 0) {
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) {
nError = LZSA_ERROR_RAW_TOOLARGE;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
nCompressedSize += (long long)nBlockheaderSize;
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
}
}
if (!nError) {
if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nError = LZSA_ERROR_RAW_UNCOMPRESSED;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) {
nError = LZSA_ERROR_DST;
}
else {
if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) {
nError = LZSA_ERROR_DST;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
}
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
if (!nError && !pInStream->eof(pInStream)) {
if (progress)
progress(nOriginalSize, nCompressedSize);
}
}
if (!nError) {
int nFooterSize;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
if (nFooterSize < 0)
nError = LZSA_ERROR_COMPRESSION;
}
if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize)
nError = LZSA_ERROR_DST;
nCompressedSize += (long long)nFooterSize;
}
if (progress)
progress(nOriginalSize, nCompressedSize);
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
if (nError) {
return nError;
}
else {
if (pOriginalSize)
*pOriginalSize = nOriginalSize;
if (pCompressedSize)
*pCompressedSize = nCompressedSize;
if (pCommandCount)
*pCommandCount = nCommandCount;
return LZSA_OK;
}
}
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
unsigned char cFrameData[16];
unsigned char *pInBlock;
unsigned char *pOutData;
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) {
return LZSA_ERROR_SRC;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
return LZSA_ERROR_FORMAT;
}
nCompressedSize += (long long)nHeaderSize;
}
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
return LZSA_ERROR_MEMORY;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
return LZSA_ERROR_MEMORY;
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
int nNumBlocks = 0;
while (!pInStream->eof(pInStream) && !nDecompressionError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
else if (nDictionaryDataSize && pDictionaryData) {
nPrevDecompressedSize = nDictionaryDataSize;
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize);
}
if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = LZSA_ERROR_FORMAT;
nBlockSize = 0;
}
nCompressedSize += (long long)nFrameSize;
}
else {
nDecompressionError = LZSA_ERROR_SRC;
nBlockSize = 0;
}
}
else {
if (!nNumBlocks)
nBlockSize = BLOCK_SIZE;
else
nBlockSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
nDecompressionError = LZSA_ERROR_FORMAT;
break;
}
size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize);
if (nFlags & LZSA_FLAG_RAW_BLOCK) {
if (nReadBytes > 4)
nReadBytes -= 4;
else
nReadBytes = 0;
nBlockSize = nReadBytes;
}
if (nReadBytes == nBlockSize) {
nCompressedSize += (long long)nReadBytes;
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize)
nDecompressionError = LZSA_ERROR_DST;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
nNumBlocks++;
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
*pOriginalSize = nOriginalSize;
*pCompressedSize = nCompressedSize;
return nDecompressionError;
}
/*-------------- Block compression API --------------*/
/**
* Initialize compression context
@ -168,7 +684,7 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
@ -207,11 +723,11 @@ int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) {
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
if (nFormatVersion == 1)
return lzsa_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)
return lzsa_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else
return -1;
}

122
src/lib.h
View File

@ -34,11 +34,128 @@
#define _LIB_H
#include "divsufsort.h"
#include "stream.h"
/** High level status for compression and decompression */
typedef enum {
LZSA_OK = 0, /**< Success */
LZSA_ERROR_SRC, /**< Error reading input */
LZSA_ERROR_DST, /**< Error reading output */
LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */
LZSA_ERROR_MEMORY, /**< Out of memory */
/* Compression-specific status codes */
LZSA_ERROR_COMPRESSION, /**< Internal compression error */
LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */
LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */
/* Decompression-specific status codes */
LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */
LZSA_ERROR_DECOMPRESSION, /**< Internal decompression error */
} lzsa_status_t;
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
/*-------------- Top level API -------------- */
/**
* Compress file
*
* @param pszInFilename name of input(source) file to compress
* @param pszOutFilename name of output(compressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
/**
* Decompress file
*
* @param pszInFilename name of input(compressed) file to decompress
* @param pszOutFilename name of output(decompressed) file to generate
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Streaming API -------------- */
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize);
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
/**
* Compress stream
*
* @param pInStream input(source) stream to compress
* @param pOutStream output(compressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_xxx)
* @param nMinMatchSize minimum match size
* @param nFormatVersion version of format to use (1-2)
* @param progress progress function, called after compressing each block, or NULL for none
* @param pOriginalSize pointer to returned input(source) size, updated when this function is successful
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
/**
* Decompress stream
*
* @param pInStream input(compressed) stream to decompress
* @param pOutStream output(decompressed) stream to write to
* @param pDictionaryData dictionary contents, or NULL for none
* @param nDictionaryDataSize size of dictionary contents, or 0
* @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0)
* @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file
* @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful
* @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
/*-------------- Block compression API --------------*/
#define LCP_BITS 15
#define LCP_MAX (1<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
@ -61,6 +178,7 @@ typedef struct _lzsa_match {
unsigned short offset;
} lzsa_match;
/** One rep-match slot (for LZSA2) */
typedef struct _lzsa_repmatch_opt {
int incoming_offset;
short best_slot_for_incoming;
@ -114,7 +232,7 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor);
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
/**
* Get the number of compression commands issued in compressed data blocks
@ -134,6 +252,6 @@ int lzsa_compressor_get_command_count(lsza_compressor *pCompressor);
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
#endif /* _LIB_H */

View File

@ -39,11 +39,8 @@
#else
#include <sys/time.h>
#endif
#include "format.h"
#include "frame.h"
#include "lib.h"
#define BLOCK_SIZE 65536
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
@ -71,489 +68,92 @@ static long long do_get_time() {
/*---------------------------------------------------------------------------*/
static void compression_progress(long long nOriginalSize, long long nCompressedSize) {
if (nOriginalSize >= 1024 * 1024) {
fprintf(stdout, "\r%lld => %lld (%g %%) \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
fflush(stdout);
}
}
static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) {
FILE *f_in, *f_out;
unsigned char *pInData, *pOutData;
lsza_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nCommandCount = 0;
int nFlags;
int nResult;
unsigned char cFrameData[16];
bool bError = false;
f_in = fopen(pszInFilename, "rb");
if (!f_in) {
fprintf(stderr, "error opening '%s' for reading\n", pszInFilename);
return 100;
}
f_out = fopen(pszOutFilename, "wb");
if (!f_out) {
fprintf(stderr, "error opening '%s' for writing\n", pszOutFilename);
return 100;
}
pInData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pInData) {
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "out of memory\n");
return 100;
}
memset(pInData, 0, BLOCK_SIZE * 2);
pOutData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pOutData) {
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "out of memory\n");
return 100;
}
memset(pOutData, 0, BLOCK_SIZE);
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *f_dictionary = fopen(pszDictionaryFilename, "rb");
if (!f_dictionary) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "error opening dictionary '%s' for reading\n", pszInFilename);
return 100;
}
fseek(f_dictionary, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(f_dictionary);
#else
off_t nDictionaryFileSize = ftello(f_dictionary);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(f_dictionary, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(f_dictionary, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_dictionary);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(f_dictionary);
f_dictionary = NULL;
}
lzsa_status_t nStatus;
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "error initializing compressor\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion);
if (nHeaderSize < 0)
bError = true;
else {
bError = fwrite(cFrameData, 1, nHeaderSize, f_out) != nHeaderSize;
nCompressedSize += (long long)nHeaderSize;
}
}
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
}
int nPreviousBlockSize = 0;
int nNumBlocks = 0;
nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount);
if (nDictionaryDataSize)
nPreviousBlockSize = nDictionaryDataSize;
while (!feof(f_in) && !bError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in);
if (nInDataSize > 0) {
if ((nOptions & OPT_RAW) != 0 && nNumBlocks) {
fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n");
bError = true;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
nOutDataSize = lzsa_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize);
if (nOutDataSize >= 0) {
/* Write compressed block */
if ((nOptions & OPT_RAW) == 0) {
int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize);
if (nBlockheaderSize < 0)
bError = true;
else {
nCompressedSize += (long long)nBlockheaderSize;
if (fwrite(cFrameData, 1, nBlockheaderSize, f_out) != (size_t)nBlockheaderSize) {
bError = true;
}
}
}
if (!bError) {
if (fwrite(pOutData, 1, (size_t)nOutDataSize, f_out) != (size_t)nOutDataSize) {
bError = true;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nOutDataSize;
}
}
}
else {
/* Write uncompressible, literal block */
if ((nOptions & OPT_RAW) != 0) {
fprintf(stderr, "error: data is incompressible, raw blocks only support compressed data\n");
bError = true;
break;
}
int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize);
if (nBlockheaderSize < 0)
bError = true;
else {
if (fwrite(cFrameData, 1, nBlockheaderSize, f_out) != (size_t)nBlockheaderSize) {
bError = true;
}
else {
if (fwrite(pInData + BLOCK_SIZE, 1, (size_t)nInDataSize, f_out) != (size_t)nInDataSize) {
bError = true;
}
else {
nOriginalSize += (long long)nInDataSize;
nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize;
}
}
}
}
nPreviousBlockSize = nInDataSize;
nNumBlocks++;
}
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
fflush(stdout);
}
}
int nFooterSize;
if ((nOptions & OPT_RAW) != 0) {
nFooterSize = 0;
}
else {
nFooterSize = lzsa_encode_footer_frame(cFrameData, 16);
if (nFooterSize < 0)
bError = true;
}
if (!bError)
bError = fwrite(cFrameData, 1, nFooterSize, f_out) != nFooterSize;
nCompressedSize += (long long)nFooterSize;
if (!bError && (nOptions & OPT_VERBOSE)) {
if ((nOptions & OPT_VERBOSE)) {
nEndTime = do_get_time();
}
switch (nStatus) {
case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break;
case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break;
case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break;
case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break;
case LZSA_ERROR_COMPRESSION: fprintf(stderr, "internal compression error\n"); break;
case LZSA_ERROR_RAW_TOOLARGE: fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n"); break;
case LZSA_ERROR_RAW_UNCOMPRESSED: fprintf(stderr, "error: data is incompressible, raw blocks only support compressed data\n"); break;
case LZSA_OK: break;
default: fprintf(stderr, "unknown compression error %d\n", nStatus); break;
}
if (nStatus)
return 100;
if ((nOptions & OPT_VERBOSE)) {
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
int nCommands = lzsa_compressor_get_command_count(&compressor);
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
pszInFilename, fDelta, fSpeed, nCommandCount, (double)nOriginalSize / (double)nCommandCount,
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
}
lzsa_compressor_destroy(&compressor);
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
if (bError) {
fprintf(stderr, "\rcompression error for '%s'\n", pszInFilename);
return 100;
}
else {
return 0;
}
return 0;
}
/*---------------------------------------------------------------------------*/
static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
unsigned int nFileSize = 0;
unsigned char cFrameData[16];
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
lzsa_status_t nStatus;
int nFlags;
FILE *pInFile = fopen(pszInFilename, "rb");
if (!pInFile) {
fprintf(stderr, "error opening input file\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (fread(cFrameData, 1, nHeaderSize, pInFile) != nHeaderSize) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error reading header in input file\n");
return 100;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
return 100;
}
}
else {
fseek(pInFile, 0, SEEK_END);
nFileSize = (unsigned int)ftell(pInFile);
fseek(pInFile, 0, SEEK_SET);
if (nFileSize < 4) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid file size for raw block mode\n");
return 100;
}
}
FILE *pOutFile = fopen(pszOutFilename, "wb");
if (!pOutFile) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
unsigned char *pInBlock;
unsigned char *pOutData;
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening dictionary file\n");
return 100;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
nFlags = 0;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
}
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
nStatus = lzsa_decompress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize);
if (nDictionaryDataSize) {
nPrevDecompressedSize = nDictionaryDataSize;
switch (nStatus) {
case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break;
case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break;
case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break;
case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break;
case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break;
case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break;
case LZSA_OK: break;
default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break;
}
while (!feof(pInFile) && !nDecompressionError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
if ((nOptions & OPT_RAW) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (fread(cFrameData, 1, nFrameSize, pInFile) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = 1;
nBlockSize = 0;
}
}
else {
nBlockSize = 0;
}
}
else {
if (nFileSize >= 4)
nBlockSize = nFileSize - 4;
nFileSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
fprintf(stderr, "block size %d > max size %d\n", nBlockSize, BLOCK_SIZE);
break;
}
if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) {
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
}
}
if (nDecompressedSize != 0) {
nOriginalSize += (long long)nDecompressedSize;
fwrite(pOutData + BLOCK_SIZE, 1, nDecompressedSize, pOutFile);
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
}
else {
break;
}
}
else {
break;
}
}
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
if (nDecompressionError) {
if (nStatus) {
fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
return 100;
}
@ -570,258 +170,141 @@ static int do_decompress(const char *pszInFilename, const char *pszOutFilename,
}
}
/*---------------------------------------------------------------------------*/
typedef struct {
FILE *f;
void *pCompareDataBuf;
size_t nCompareDataSize;
} compare_stream_t;
void comparestream_close(lzsa_stream_t *stream) {
if (stream->obj) {
compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj;
if (pCompareStream->pCompareDataBuf) {
free(pCompareStream->pCompareDataBuf);
pCompareStream->pCompareDataBuf = NULL;
}
fclose(pCompareStream->f);
free(pCompareStream);
stream->obj = NULL;
stream->read = NULL;
stream->write = NULL;
stream->eof = NULL;
stream->close = NULL;
}
}
size_t comparestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
return 0;
}
size_t comparestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj;
if (!pCompareStream->pCompareDataBuf || pCompareStream->nCompareDataSize < size) {
pCompareStream->nCompareDataSize = size;
pCompareStream->pCompareDataBuf = realloc(pCompareStream->pCompareDataBuf, pCompareStream->nCompareDataSize);
if (!pCompareStream->pCompareDataBuf)
return 0;
}
size_t nReadBytes = fread(pCompareStream->pCompareDataBuf, 1, size, pCompareStream->f);
if (nReadBytes != size) {
return 0;
}
if (memcmp(ptr, pCompareStream->pCompareDataBuf, size)) {
return 0;
}
return size;
}
int comparestream_eof(lzsa_stream_t *stream) {
compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj;
return feof(pCompareStream->f);
}
int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, const char *pszMode) {
compare_stream_t *pCompareStream;
pCompareStream = (compare_stream_t*)malloc(sizeof(compare_stream_t));
if (!pCompareStream)
return -1;
pCompareStream->pCompareDataBuf = NULL;
pCompareStream->nCompareDataSize = 0;
pCompareStream->f = (void*)fopen(pszCompareFilename, pszMode);
if (pCompareStream->f) {
stream->obj = pCompareStream;
stream->read = comparestream_read;
stream->write = comparestream_write;
stream->eof = comparestream_eof;
stream->close = comparestream_close;
return 0;
}
else
return -1;
}
static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) {
lzsa_stream_t inStream, compareStream;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
long long nKnownGoodSize = 0LL;
unsigned int nFileSize = 0;
unsigned char cFrameData[16];
long long nCompressedSize = 0LL;
void *pDictionaryData = NULL;
int nDictionaryDataSize = 0;
lzsa_status_t nStatus;
int nFlags;
FILE *pInFile = fopen(pszInFilename, "rb");
if (!pInFile) {
if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) {
fprintf(stderr, "error opening compressed input file\n");
return 100;
}
if ((nOptions & OPT_RAW) == 0) {
const int nHeaderSize = lzsa_get_header_size();
memset(cFrameData, 0, 16);
if (fread(cFrameData, 1, nHeaderSize, pInFile) != nHeaderSize) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error reading header in compressed input file\n");
return 100;
}
if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid magic number or format version in input file\n");
return 100;
}
}
else {
fseek(pInFile, 0, SEEK_END);
nFileSize = (unsigned int)ftell(pInFile);
fseek(pInFile, 0, SEEK_SET);
if (nFileSize < 4) {
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "invalid file size for raw block mode\n");
return 100;
}
}
FILE *pOutFile = fopen(pszOutFilename, "rb");
if (!pOutFile) {
fclose(pInFile);
pInFile = NULL;
if (comparestream_open(&compareStream, pszOutFilename, "rb") < 0) {
fprintf(stderr, "error opening original uncompressed file\n");
inStream.close(&inStream);
return 100;
}
unsigned char *pInBlock;
unsigned char *pOutData;
unsigned char *pCompareData;
pInBlock = (unsigned char*)malloc(BLOCK_SIZE);
if (!pInBlock) {
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize);
if (nStatus) {
compareStream.close(&compareStream);
inStream.close(&inStream);
fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename);
return 100;
}
pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2);
if (!pOutData) {
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
pCompareData = (unsigned char*)malloc(BLOCK_SIZE);
if (!pCompareData) {
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening output file\n");
return 100;
}
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pCompareData);
pCompareData = NULL;
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening dictionary file\n");
return 100;
}
fseek(pDictionaryFile, 0, SEEK_END);
#ifdef _WIN32
__int64 nDictionaryFileSize = _ftelli64(pDictionaryFile);
#else
off_t nDictionaryFileSize = ftello(pDictionaryFile);
#endif
if (nDictionaryFileSize > BLOCK_SIZE) {
/* Use the last BLOCK_SIZE bytes of the dictionary */
fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END);
}
else {
fseek(pDictionaryFile, 0, SEEK_SET);
}
nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
nFlags = 0;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
if (nOptions & OPT_VERBOSE) {
nStartTime = do_get_time();
}
int nDecompressionError = 0;
bool bComparisonError = false;
int nPrevDecompressedSize = 0;
nStatus = lzsa_decompress_stream(&inStream, &compareStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize);
if (nDictionaryDataSize) {
nPrevDecompressedSize = nDictionaryDataSize;
switch (nStatus) {
case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break;
case LZSA_ERROR_DST: fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename); break;
case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break;
case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break;
case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break;
case LZSA_OK: break;
default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break;
}
while (!feof(pInFile) && !nDecompressionError && !bComparisonError) {
unsigned int nBlockSize = 0;
int nIsUncompressed = 0;
lzsa_dictionary_free(&pDictionaryData);
compareStream.close(&compareStream);
inStream.close(&inStream);
if (nPrevDecompressedSize != 0) {
memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize);
}
int nBytesToCompare = (int)fread(pCompareData, 1, BLOCK_SIZE, pOutFile);
if ((nOptions & OPT_RAW) == 0) {
const int nFrameSize = lzsa_get_frame_size();
memset(cFrameData, 0, 16);
if (fread(cFrameData, 1, nFrameSize, pInFile) == nFrameSize) {
if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) {
nDecompressionError = 1;
nBlockSize = 0;
}
}
else {
nBlockSize = 0;
}
}
else {
if (nFileSize >= 4)
nBlockSize = nFileSize - 4;
nFileSize = 0;
}
if (nBlockSize != 0) {
int nDecompressedSize = 0;
if ((int)nBlockSize > BLOCK_SIZE) {
fprintf(stderr, "block size %d > max size %d\n", nBlockSize, BLOCK_SIZE);
break;
}
if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) {
if (nIsUncompressed) {
memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize);
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
if (nDecompressedSize < 0) {
nDecompressionError = nDecompressedSize;
break;
}
}
if (nDecompressedSize == nBytesToCompare) {
nKnownGoodSize = nOriginalSize;
nOriginalSize += (long long)nDecompressedSize;
if (memcmp(pOutData + BLOCK_SIZE, pCompareData, nBytesToCompare))
bComparisonError = true;
nPrevDecompressedSize = nDecompressedSize;
nDecompressedSize = 0;
}
else {
bComparisonError = true;
break;
}
}
else {
break;
}
}
else {
break;
}
}
free(pCompareData);
pCompareData = NULL;
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
if (nDecompressionError) {
fprintf(stderr, "decompression error for '%s'\n", pszInFilename);
return 100;
}
else if (bComparisonError) {
fprintf(stderr, "error comparing compressed file '%s' with original '%s' starting at %lld\n", pszInFilename, pszOutFilename, nKnownGoodSize);
if (nStatus) {
return 100;
}
else {

111
src/stream.c Normal file
View File

@ -0,0 +1,111 @@
/*
* stream.c - streaming I/O implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "stream.h"
/**
* Close file stream
*
* @param stream stream
*/
static void lzsa_filestream_close(lzsa_stream_t *stream) {
if (stream->obj) {
fclose((FILE*)stream->obj);
stream->obj = NULL;
stream->read = NULL;
stream->write = NULL;
stream->eof = NULL;
stream->close = NULL;
}
}
/**
* Read from file stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) {
return fread(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Write to file stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) {
return fwrite(ptr, 1, size, (FILE*)stream->obj);
}
/**
* Check if file stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
static int lzsa_filestream_eof(lzsa_stream_t *stream) {
return feof((FILE*)stream->obj);
}
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) {
stream->obj = (void*)fopen(pszInFilename, pszMode);
if (stream->obj) {
stream->read = lzsa_filestream_read;
stream->write = lzsa_filestream_write;
stream->eof = lzsa_filestream_eof;
stream->close = lzsa_filestream_close;
return 0;
}
else
return -1;
}

95
src/stream.h Normal file
View File

@ -0,0 +1,95 @@
/*
* stream.h - streaming I/O definitions
*
* Copyright (C) 2019 Emmanuel Marty
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must not
* claim that you wrote the original software. If you use this software
* in a product, an acknowledgment in the product documentation would be
* appreciated but is not required.
* 2. Altered source versions must be plainly marked as such, and must not be
* misrepresented as being the original software.
* 3. This notice may not be removed or altered from any source distribution.
*/
/*
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
*
*/
#ifndef _STREAM_H
#define _STREAM_H
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
/* I/O stream */
typedef struct _lzsa_stream_t {
/** Opaque stream-specific pointer */
void *obj;
/**
* Read from stream
*
* @param stream stream
* @param ptr buffer to read into
* @param size number of bytes to read
*
* @return number of bytes read
*/
size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Write to stream
*
* @param stream stream
* @param ptr buffer to write from
* @param size number of bytes to write
*
* @return number of bytes written
*/
size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size);
/**
* Check if stream has reached the end of the data
*
* @param stream stream
*
* @return nonzero if the end of the data has been reached, 0 if there is more data
*/
int(*eof)(lzsa_stream_t *stream);
/**
* Close stream
*
* @param stream stream
*/
void(*close)(lzsa_stream_t *stream);
} lzsa_stream_t;
/**
* Open file and create an I/O stream from it
*
* @param stream stream to fill out
* @param pszInFilename filename
* @param pszMode open mode, as with fopen()
*
* @return 0 for success, nonzero for failure
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
#endif /* _STREAM_H */