From 277b5b1025166d999c508b2d9844a353e9e7e1af Mon Sep 17 00:00:00 2001 From: emmanuel-marty Date: Mon, 13 May 2019 22:22:53 +0200 Subject: [PATCH] Move top-level streaming compression code to library --- Makefile | 3 +- VS2017/lzsa.vcxproj | 2 + VS2017/lzsa.vcxproj.filters | 6 + src/expand_v1.c | 2 +- src/expand_v1.h | 2 +- src/expand_v2.c | 2 +- src/expand_v2.h | 2 +- src/lib.c | 524 ++++++++++++++++++++++- src/lib.h | 122 +++++- src/lzsa.c | 833 +++++++----------------------------- src/stream.c | 111 +++++ src/stream.h | 95 ++++ 12 files changed, 1018 insertions(+), 686 deletions(-) create mode 100644 src/stream.c create mode 100644 src/stream.h diff --git a/Makefile b/Makefile index a9d3601..ebfaf6d 100755 --- a/Makefile +++ b/Makefile @@ -11,8 +11,9 @@ $(OBJDIR)/%.o: src/../%.c APP := lzsa OBJS := $(OBJDIR)/src/lzsa.o -OBJS += $(OBJDIR)/src/frame.o OBJS += $(OBJDIR)/src/lib.o +OBJS += $(OBJDIR)/src/stream.o +OBJS += $(OBJDIR)/src/frame.o OBJS += $(OBJDIR)/src/matchfinder.o OBJS += $(OBJDIR)/src/shrink_v1.o OBJS += $(OBJDIR)/src/shrink_v2.o diff --git a/VS2017/lzsa.vcxproj b/VS2017/lzsa.vcxproj index 89325d5..3304589 100755 --- a/VS2017/lzsa.vcxproj +++ b/VS2017/lzsa.vcxproj @@ -188,6 +188,7 @@ + @@ -203,6 +204,7 @@ + diff --git a/VS2017/lzsa.vcxproj.filters b/VS2017/lzsa.vcxproj.filters index 511e8d4..e00658a 100755 --- a/VS2017/lzsa.vcxproj.filters +++ b/VS2017/lzsa.vcxproj.filters @@ -60,6 +60,9 @@ Fichiers sources + + Fichiers sources + @@ -98,5 +101,8 @@ Fichiers sources + + Fichiers sources + \ No newline at end of file diff --git a/src/expand_v1.c b/src/expand_v1.c index c19fb08..eaae559 100644 --- a/src/expand_v1.c +++ b/src/expand_v1.c @@ -179,7 +179,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v1(const unsigned char **p * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { +int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { const unsigned char *pInBlockEnd = pInBlock + nBlockSize; const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; unsigned char *pCurOutData = pOutData + nOutDataOffset; diff --git a/src/expand_v1.h b/src/expand_v1.h index d6f44b9..ac801ca 100644 --- a/src/expand_v1.h +++ b/src/expand_v1.h @@ -44,6 +44,6 @@ * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); +int lzsa_decompressor_expand_block_v1(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); #endif /* _EXPAND_V1_H */ diff --git a/src/expand_v2.c b/src/expand_v2.c index 2c993e5..f2aaa46 100644 --- a/src/expand_v2.c +++ b/src/expand_v2.c @@ -184,7 +184,7 @@ static inline FORCE_INLINE int lzsa_expand_match_slow_v2(const unsigned char **p * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { +int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { const unsigned char *pInBlockEnd = pInBlock + nBlockSize; const unsigned char *pInBlockFastEnd = pInBlock + nBlockSize - 8; unsigned char *pCurOutData = pOutData + nOutDataOffset; diff --git a/src/expand_v2.h b/src/expand_v2.h index 906965c..e2c8fdb 100644 --- a/src/expand_v2.h +++ b/src/expand_v2.h @@ -44,6 +44,6 @@ * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); +int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); #endif /* _EXPAND_V2_H */ diff --git a/src/lib.c b/src/lib.c index 5a516b2..272eed6 100755 --- a/src/lib.c +++ b/src/lib.c @@ -40,6 +40,522 @@ #include "expand_v1.h" #include "expand_v2.h" #include "format.h" +#include "frame.h" + +#define BLOCK_SIZE 65536 + +/*-------------- Top level API -------------- */ + +/** + * Compress file + * + * @param pszInFilename name of input(source) file to compress + * @param pszOutFilename name of output(compressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) { + lzsa_stream_t inStream, outStream; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { + return LZSA_ERROR_SRC; + } + + if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) { + inStream.close(&inStream); + return LZSA_ERROR_DST; + } + + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + + if (nStatus) { + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; + } + + nStatus = lsza_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount); + + lzsa_dictionary_free(&pDictionaryData); + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; +} + +/** + * Decompress file + * + * @param pszInFilename name of input(compressed) file to decompress + * @param pszOutFilename name of output(decompressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize) { + lzsa_stream_t inStream, outStream; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { + return LZSA_ERROR_SRC; + } + + if (lzsa_filestream_open(&outStream, pszOutFilename, "wb") < 0) { + inStream.close(&inStream); + return LZSA_ERROR_DST; + } + + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + if (nStatus) { + outStream.close(&outStream); + inStream.close(&inStream); + return nStatus; + } + + nStatus = lzsa_decompress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, pOriginalSize, pCompressedSize); + + lzsa_dictionary_free(&pDictionaryData); + outStream.close(&outStream); + inStream.close(&inStream); + + return nStatus; +} + +/*-------------- Streaming API -------------- */ + +/** + * Load dictionary contents + * + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param pDictionaryData pointer to returned dictionary contents, or NULL for none + * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize) { + unsigned char *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + + if (pszDictionaryFilename) { + pDictionaryData = (unsigned char *)malloc(BLOCK_SIZE); + if (!pDictionaryData) { + return LZSA_ERROR_MEMORY; + } + + FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); + if (!pDictionaryFile) { + free(pDictionaryData); + pDictionaryData = NULL; + return LZSA_ERROR_DICTIONARY; + } + + fseek(pDictionaryFile, 0, SEEK_END); +#ifdef _WIN32 + __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile); +#else + off_t nDictionaryFileSize = ftello(pDictionaryFile); +#endif + if (nDictionaryFileSize > BLOCK_SIZE) { + /* Use the last BLOCK_SIZE bytes of the dictionary */ + fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END); + } + else { + fseek(pDictionaryFile, 0, SEEK_SET); + } + + nDictionaryDataSize = (int)fread(pDictionaryData, 1, BLOCK_SIZE, pDictionaryFile); + if (nDictionaryDataSize < 0) + nDictionaryDataSize = 0; + + fclose(pDictionaryFile); + pDictionaryFile = NULL; + } + + *ppDictionaryData = pDictionaryData; + *pDictionaryDataSize = nDictionaryDataSize; + return LZSA_OK; +} + +/** + * Free dictionary contents + * + * @param pDictionaryData pointer to pointer to dictionary contents + */ +void lzsa_dictionary_free(void **ppDictionaryData) { + if (*ppDictionaryData) { + free(*ppDictionaryData); + ppDictionaryData = NULL; + } +} + +/** + * Compress stream + * + * @param pInStream input(source) stream to compress + * @param pOutStream output(compressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) { + unsigned char *pInData, *pOutData; + lsza_compressor compressor; + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + int nResult; + unsigned char cFrameData[16]; + int nError = 0; + + pInData = (unsigned char*)malloc(BLOCK_SIZE * 2); + if (!pInData) { + return LZSA_ERROR_MEMORY; + } + memset(pInData, 0, BLOCK_SIZE * 2); + + pOutData = (unsigned char*)malloc(BLOCK_SIZE); + if (!pOutData) { + free(pInData); + pInData = NULL; + + return LZSA_ERROR_MEMORY; + } + memset(pOutData, 0, BLOCK_SIZE); + + nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags); + if (nResult != 0) { + free(pOutData); + pOutData = NULL; + + free(pInData); + pInData = NULL; + + return LZSA_ERROR_MEMORY; + } + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion); + if (nHeaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + if (pOutStream->write(pOutStream, cFrameData, nHeaderSize) != nHeaderSize) + nError = LZSA_ERROR_DST; + nCompressedSize += (long long)nHeaderSize; + } + } + + int nPreviousBlockSize = 0; + int nNumBlocks = 0; + + while (!pInStream->eof(pInStream) && !nError) { + int nInDataSize; + + if (nPreviousBlockSize) { + memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize); + } + else if (nDictionaryDataSize && pDictionaryData) { + nPreviousBlockSize = nDictionaryDataSize; + memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pDictionaryData, nPreviousBlockSize); + } + + nInDataSize = (int)pInStream->read(pInStream, pInData + BLOCK_SIZE, BLOCK_SIZE); + if (nInDataSize > 0) { + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0 && nNumBlocks) { + nError = LZSA_ERROR_RAW_TOOLARGE; + break; + } + nDictionaryDataSize = 0; + + int nOutDataSize; + + nOutDataSize = lzsa_compressor_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize); + if (nOutDataSize >= 0) { + /* Write compressed block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + nCompressedSize += (long long)nBlockheaderSize; + if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) { + nError = LZSA_ERROR_DST; + } + } + } + + if (!nError) { + if (pOutStream->write(pOutStream, pOutData, (size_t)nOutDataSize) != (size_t)nOutDataSize) { + nError = LZSA_ERROR_DST; + } + else { + nOriginalSize += (long long)nInDataSize; + nCompressedSize += (long long)nOutDataSize; + } + } + } + else { + /* Write uncompressible, literal block */ + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nError = LZSA_ERROR_RAW_UNCOMPRESSED; + break; + } + + int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize); + if (nBlockheaderSize < 0) + nError = LZSA_ERROR_COMPRESSION; + else { + if (pOutStream->write(pOutStream, cFrameData, nBlockheaderSize) != (size_t)nBlockheaderSize) { + nError = LZSA_ERROR_DST; + } + else { + if (pOutStream->write(pOutStream, pInData + BLOCK_SIZE, (size_t)nInDataSize) != (size_t)nInDataSize) { + nError = LZSA_ERROR_DST; + } + else { + nOriginalSize += (long long)nInDataSize; + nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize; + } + } + } + } + + nPreviousBlockSize = nInDataSize; + nNumBlocks++; + } + + if (!nError && !pInStream->eof(pInStream)) { + if (progress) + progress(nOriginalSize, nCompressedSize); + } + } + + if (!nError) { + int nFooterSize; + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) != 0) { + nFooterSize = 0; + } + else { + nFooterSize = lzsa_encode_footer_frame(cFrameData, 16); + if (nFooterSize < 0) + nError = LZSA_ERROR_COMPRESSION; + } + + if (pOutStream->write(pOutStream, cFrameData, nFooterSize) != nFooterSize) + nError = LZSA_ERROR_DST; + nCompressedSize += (long long)nFooterSize; + } + + if (progress) + progress(nOriginalSize, nCompressedSize); + + int nCommandCount = lzsa_compressor_get_command_count(&compressor); + lzsa_compressor_destroy(&compressor); + + free(pOutData); + pOutData = NULL; + + free(pInData); + pInData = NULL; + + if (nError) { + return nError; + } + else { + if (pOriginalSize) + *pOriginalSize = nOriginalSize; + if (pCompressedSize) + *pCompressedSize = nCompressedSize; + if (pCommandCount) + *pCommandCount = nCommandCount; + return LZSA_OK; + } +} + +/** + * Decompress stream + * + * @param pInStream input(compressed) stream to decompress + * @param pOutStream output(decompressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize) { + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + unsigned char cFrameData[16]; + unsigned char *pInBlock; + unsigned char *pOutData; + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + const int nHeaderSize = lzsa_get_header_size(); + + memset(cFrameData, 0, 16); + if (pInStream->read(pInStream, cFrameData, nHeaderSize) != nHeaderSize) { + return LZSA_ERROR_SRC; + } + + if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { + return LZSA_ERROR_FORMAT; + } + + nCompressedSize += (long long)nHeaderSize; + } + + pInBlock = (unsigned char*)malloc(BLOCK_SIZE); + if (!pInBlock) { + return LZSA_ERROR_MEMORY; + } + + pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2); + if (!pOutData) { + free(pInBlock); + pInBlock = NULL; + + return LZSA_ERROR_MEMORY; + } + + int nDecompressionError = 0; + int nPrevDecompressedSize = 0; + int nNumBlocks = 0; + + while (!pInStream->eof(pInStream) && !nDecompressionError) { + unsigned int nBlockSize = 0; + int nIsUncompressed = 0; + + if (nPrevDecompressedSize != 0) { + memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize); + } + else if (nDictionaryDataSize && pDictionaryData) { + nPrevDecompressedSize = nDictionaryDataSize; + memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pDictionaryData, nPrevDecompressedSize); + } + + if ((nFlags & LZSA_FLAG_RAW_BLOCK) == 0) { + const int nFrameSize = lzsa_get_frame_size(); + + memset(cFrameData, 0, 16); + if (pInStream->read(pInStream, cFrameData, nFrameSize) == nFrameSize) { + if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) { + nDecompressionError = LZSA_ERROR_FORMAT; + nBlockSize = 0; + } + + nCompressedSize += (long long)nFrameSize; + } + else { + nDecompressionError = LZSA_ERROR_SRC; + nBlockSize = 0; + } + } + else { + if (!nNumBlocks) + nBlockSize = BLOCK_SIZE; + else + nBlockSize = 0; + } + + if (nBlockSize != 0) { + int nDecompressedSize = 0; + + if ((int)nBlockSize > BLOCK_SIZE) { + nDecompressionError = LZSA_ERROR_FORMAT; + break; + } + size_t nReadBytes = pInStream->read(pInStream, pInBlock, nBlockSize); + if (nFlags & LZSA_FLAG_RAW_BLOCK) { + if (nReadBytes > 4) + nReadBytes -= 4; + else + nReadBytes = 0; + nBlockSize = nReadBytes; + } + + if (nReadBytes == nBlockSize) { + nCompressedSize += (long long)nReadBytes; + + if (nIsUncompressed) { + memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize); + nDecompressedSize = nBlockSize; + } + else { + unsigned int nBlockOffs = 0; + + nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); + if (nDecompressedSize < 0) { + nDecompressionError = LZSA_ERROR_DECOMPRESSION; + break; + } + } + + if (nDecompressedSize != 0) { + nOriginalSize += (long long)nDecompressedSize; + + if (pOutStream->write(pOutStream, pOutData + BLOCK_SIZE, nDecompressedSize) != nDecompressedSize) + nDecompressionError = LZSA_ERROR_DST; + nPrevDecompressedSize = nDecompressedSize; + nDecompressedSize = 0; + } + } + else { + break; + } + + nNumBlocks++; + } + else { + break; + } + } + + free(pOutData); + pOutData = NULL; + + free(pInBlock); + pInBlock = NULL; + + *pOriginalSize = nOriginalSize; + *pCompressedSize = nCompressedSize; + return nDecompressionError; +} + +/*-------------- Block compression API --------------*/ /** * Initialize compression context @@ -168,7 +684,7 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor) { * * @return size of compressed data in output buffer, or -1 if the data is uncompressible */ -int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { +int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) return -1; if (nPreviousBlockSize) { @@ -207,11 +723,11 @@ int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) { * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { +int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { if (nFormatVersion == 1) - return lzsa_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); else if (nFormatVersion == 2) - return lzsa_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); + return lzsa_decompressor_expand_block_v2(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); else return -1; } diff --git a/src/lib.h b/src/lib.h index 331b7cb..7296204 100755 --- a/src/lib.h +++ b/src/lib.h @@ -34,11 +34,128 @@ #define _LIB_H #include "divsufsort.h" +#include "stream.h" + +/** High level status for compression and decompression */ +typedef enum { + LZSA_OK = 0, /**< Success */ + LZSA_ERROR_SRC, /**< Error reading input */ + LZSA_ERROR_DST, /**< Error reading output */ + LZSA_ERROR_DICTIONARY, /**< Error reading dictionary */ + LZSA_ERROR_MEMORY, /**< Out of memory */ + + /* Compression-specific status codes */ + LZSA_ERROR_COMPRESSION, /**< Internal compression error */ + LZSA_ERROR_RAW_TOOLARGE, /**< Input is too large to be compressed to a raw block */ + LZSA_ERROR_RAW_UNCOMPRESSED, /**< Input is incompressible and raw blocks don't support uncompressed data */ + + /* Decompression-specific status codes */ + LZSA_ERROR_FORMAT, /**< Invalid input format or magic number when decompressing */ + LZSA_ERROR_DECOMPRESSION, /**< Internal decompression error */ +} lzsa_status_t; /* Compression flags */ #define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */ #define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */ +/*-------------- Top level API -------------- */ + +/** + * Compress file + * + * @param pszInFilename name of input(source) file to compress + * @param pszOutFilename name of output(compressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lsza_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); + +/** + * Decompress file + * + * @param pszInFilename name of input(compressed) file to decompress + * @param pszOutFilename name of output(decompressed) file to generate + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize); + +/*-------------- Streaming API -------------- */ + +/** + * Load dictionary contents + * + * @param pszDictionaryFilename name of dictionary file, or NULL for none + * @param pDictionaryData pointer to returned dictionary contents, or NULL for none + * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryData, int *pDictionaryDataSize); + +/** + * Free dictionary contents + * + * @param pDictionaryData pointer to pointer to dictionary contents + */ +void lzsa_dictionary_free(void **ppDictionaryData); + +/** + * Compress stream + * + * @param pInStream input(source) stream to compress + * @param pOutStream output(compressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_xxx) + * @param nMinMatchSize minimum match size + * @param nFormatVersion version of format to use (1-2) + * @param progress progress function, called after compressing each block, or NULL for none + * @param pOriginalSize pointer to returned input(source) size, updated when this function is successful + * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful + * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lsza_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, + const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); + +/** + * Decompress stream + * + * @param pInStream input(compressed) stream to decompress + * @param pOutStream output(decompressed) stream to write to + * @param pDictionaryData dictionary contents, or NULL for none + * @param nDictionaryDataSize size of dictionary contents, or 0 + * @param nFlags compression flags (LZSA_FLAG_RAW_BLOCK to decompress a raw block, or 0) + * @param nFormatVersion default version of format to use (1-2). This is used when decompressing a raw block, otherwise the version is extracted from the source file + * @param pOriginalSize pointer to returned output(decompressed) size, updated when this function is successful + * @param pCompressedSize pointer to returned input(compressed) size, updated when this function is successful + * + * @return LZSA_OK for success, or an error value from lzsa_status_t + */ +lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, + long long *pOriginalSize, long long *pCompressedSize); + +/*-------------- Block compression API --------------*/ + #define LCP_BITS 15 #define LCP_MAX (1<<(LCP_BITS - 1)) #define LCP_SHIFT (32-LCP_BITS) @@ -61,6 +178,7 @@ typedef struct _lzsa_match { unsigned short offset; } lzsa_match; +/** One rep-match slot (for LZSA2) */ typedef struct _lzsa_repmatch_opt { int incoming_offset; short best_slot_for_incoming; @@ -114,7 +232,7 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor); * * @return size of compressed data in output buffer, or -1 if the data is uncompressible */ -int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); +int lzsa_compressor_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); /** * Get the number of compression commands issued in compressed data blocks @@ -134,6 +252,6 @@ int lzsa_compressor_get_command_count(lsza_compressor *pCompressor); * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); +int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); #endif /* _LIB_H */ diff --git a/src/lzsa.c b/src/lzsa.c index 5069bbc..d84d4d7 100755 --- a/src/lzsa.c +++ b/src/lzsa.c @@ -39,11 +39,8 @@ #else #include #endif -#include "format.h" -#include "frame.h" #include "lib.h" -#define BLOCK_SIZE 65536 #define OPT_VERBOSE 1 #define OPT_RAW 2 #define OPT_FAVOR_RATIO 4 @@ -71,489 +68,92 @@ static long long do_get_time() { /*---------------------------------------------------------------------------*/ +static void compression_progress(long long nOriginalSize, long long nCompressedSize) { + if (nOriginalSize >= 1024 * 1024) { + fprintf(stdout, "\r%lld => %lld (%g %%) \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + fflush(stdout); + } +} + static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) { - FILE *f_in, *f_out; - unsigned char *pInData, *pOutData; - lsza_compressor compressor; long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL, nCompressedSize = 0LL; + int nCommandCount = 0; int nFlags; - int nResult; - unsigned char cFrameData[16]; - bool bError = false; - - f_in = fopen(pszInFilename, "rb"); - if (!f_in) { - fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); - return 100; - } - - f_out = fopen(pszOutFilename, "wb"); - if (!f_out) { - fprintf(stderr, "error opening '%s' for writing\n", pszOutFilename); - return 100; - } - - pInData = (unsigned char*)malloc(BLOCK_SIZE * 2); - if (!pInData) { - fclose(f_out); - f_out = NULL; - - fclose(f_in); - f_in = NULL; - - fprintf(stderr, "out of memory\n"); - return 100; - } - memset(pInData, 0, BLOCK_SIZE * 2); - - pOutData = (unsigned char*)malloc(BLOCK_SIZE); - if (!pOutData) { - free(pInData); - pInData = NULL; - - fclose(f_out); - f_out = NULL; - - fclose(f_in); - f_in = NULL; - - fprintf(stderr, "out of memory\n"); - return 100; - } - memset(pOutData, 0, BLOCK_SIZE); - - int nDictionaryDataSize = 0; - - if (pszDictionaryFilename) { - FILE *f_dictionary = fopen(pszDictionaryFilename, "rb"); - if (!f_dictionary) { - free(pOutData); - pOutData = NULL; - - free(pInData); - pInData = NULL; - - fclose(f_out); - f_out = NULL; - - fclose(f_in); - f_in = NULL; - - fprintf(stderr, "error opening dictionary '%s' for reading\n", pszInFilename); - return 100; - } - - fseek(f_dictionary, 0, SEEK_END); -#ifdef _WIN32 - __int64 nDictionaryFileSize = _ftelli64(f_dictionary); -#else - off_t nDictionaryFileSize = ftello(f_dictionary); -#endif - if (nDictionaryFileSize > BLOCK_SIZE) { - /* Use the last BLOCK_SIZE bytes of the dictionary */ - fseek(f_dictionary, -BLOCK_SIZE, SEEK_END); - } - else { - fseek(f_dictionary, 0, SEEK_SET); - } - - nDictionaryDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_dictionary); - if (nDictionaryDataSize < 0) - nDictionaryDataSize = 0; - - fclose(f_dictionary); - f_dictionary = NULL; - } + lzsa_status_t nStatus; nFlags = 0; if (nOptions & OPT_FAVOR_RATIO) nFlags |= LZSA_FLAG_FAVOR_RATIO; if (nOptions & OPT_RAW) nFlags |= LZSA_FLAG_RAW_BLOCK; - nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFormatVersion, nFlags); - if (nResult != 0) { - free(pOutData); - pOutData = NULL; - - free(pInData); - pInData = NULL; - - fclose(f_out); - f_out = NULL; - - fclose(f_in); - f_in = NULL; - - fprintf(stderr, "error initializing compressor\n"); - return 100; - } - - if ((nOptions & OPT_RAW) == 0) { - int nHeaderSize = lzsa_encode_header(cFrameData, 16, nFormatVersion); - if (nHeaderSize < 0) - bError = true; - else { - bError = fwrite(cFrameData, 1, nHeaderSize, f_out) != nHeaderSize; - nCompressedSize += (long long)nHeaderSize; - } - } if (nOptions & OPT_VERBOSE) { nStartTime = do_get_time(); } - int nPreviousBlockSize = 0; - int nNumBlocks = 0; + nStatus = lsza_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount); - if (nDictionaryDataSize) - nPreviousBlockSize = nDictionaryDataSize; - - while (!feof(f_in) && !bError) { - int nInDataSize; - - if (nPreviousBlockSize) { - memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize); - } - - nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in); - if (nInDataSize > 0) { - if ((nOptions & OPT_RAW) != 0 && nNumBlocks) { - fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n"); - bError = true; - break; - } - nDictionaryDataSize = 0; - - int nOutDataSize; - - nOutDataSize = lzsa_shrink_block(&compressor, pInData + BLOCK_SIZE - nPreviousBlockSize, nPreviousBlockSize, nInDataSize, pOutData, (nInDataSize >= BLOCK_SIZE) ? BLOCK_SIZE : nInDataSize); - if (nOutDataSize >= 0) { - /* Write compressed block */ - - if ((nOptions & OPT_RAW) == 0) { - int nBlockheaderSize = lzsa_encode_compressed_block_frame(cFrameData, 16, nOutDataSize); - if (nBlockheaderSize < 0) - bError = true; - else { - nCompressedSize += (long long)nBlockheaderSize; - if (fwrite(cFrameData, 1, nBlockheaderSize, f_out) != (size_t)nBlockheaderSize) { - bError = true; - } - } - } - - if (!bError) { - if (fwrite(pOutData, 1, (size_t)nOutDataSize, f_out) != (size_t)nOutDataSize) { - bError = true; - } - else { - nOriginalSize += (long long)nInDataSize; - nCompressedSize += (long long)nOutDataSize; - } - } - } - else { - /* Write uncompressible, literal block */ - - if ((nOptions & OPT_RAW) != 0) { - fprintf(stderr, "error: data is incompressible, raw blocks only support compressed data\n"); - bError = true; - break; - } - - int nBlockheaderSize = lzsa_encode_uncompressed_block_frame(cFrameData, 16, nInDataSize); - if (nBlockheaderSize < 0) - bError = true; - else { - if (fwrite(cFrameData, 1, nBlockheaderSize, f_out) != (size_t)nBlockheaderSize) { - bError = true; - } - else { - if (fwrite(pInData + BLOCK_SIZE, 1, (size_t)nInDataSize, f_out) != (size_t)nInDataSize) { - bError = true; - } - else { - nOriginalSize += (long long)nInDataSize; - nCompressedSize += (long long)nBlockheaderSize + (long long)nInDataSize; - } - } - } - } - - nPreviousBlockSize = nInDataSize; - nNumBlocks++; - } - - if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) { - fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); - fflush(stdout); - } - } - - int nFooterSize; - - if ((nOptions & OPT_RAW) != 0) { - nFooterSize = 0; - } - else { - nFooterSize = lzsa_encode_footer_frame(cFrameData, 16); - if (nFooterSize < 0) - bError = true; - } - - if (!bError) - bError = fwrite(cFrameData, 1, nFooterSize, f_out) != nFooterSize; - nCompressedSize += (long long)nFooterSize; - - if (!bError && (nOptions & OPT_VERBOSE)) { + if ((nOptions & OPT_VERBOSE)) { nEndTime = do_get_time(); + } + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break; + case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_COMPRESSION: fprintf(stderr, "internal compression error\n"); break; + case LZSA_ERROR_RAW_TOOLARGE: fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n"); break; + case LZSA_ERROR_RAW_UNCOMPRESSED: fprintf(stderr, "error: data is incompressible, raw blocks only support compressed data\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown compression error %d\n", nStatus); break; + } + + if (nStatus) + return 100; + + if ((nOptions & OPT_VERBOSE)) { double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; - int nCommands = lzsa_compressor_get_command_count(&compressor); fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n", - pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands, + pszInFilename, fDelta, fSpeed, nCommandCount, (double)nOriginalSize / (double)nCommandCount, nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); } - lzsa_compressor_destroy(&compressor); - - free(pOutData); - pOutData = NULL; - - free(pInData); - pInData = NULL; - - fclose(f_out); - f_out = NULL; - - fclose(f_in); - f_in = NULL; - - if (bError) { - fprintf(stderr, "\rcompression error for '%s'\n", pszInFilename); - return 100; - } - else { - return 0; - } + return 0; } /*---------------------------------------------------------------------------*/ static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { long long nStartTime = 0LL, nEndTime = 0LL; - long long nOriginalSize = 0LL; - unsigned int nFileSize = 0; - unsigned char cFrameData[16]; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + lzsa_status_t nStatus; + int nFlags; - FILE *pInFile = fopen(pszInFilename, "rb"); - if (!pInFile) { - fprintf(stderr, "error opening input file\n"); - return 100; - } - - if ((nOptions & OPT_RAW) == 0) { - const int nHeaderSize = lzsa_get_header_size(); - - memset(cFrameData, 0, 16); - if (fread(cFrameData, 1, nHeaderSize, pInFile) != nHeaderSize) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error reading header in input file\n"); - return 100; - } - - if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "invalid magic number or format version in input file\n"); - return 100; - } - } - else { - fseek(pInFile, 0, SEEK_END); - nFileSize = (unsigned int)ftell(pInFile); - fseek(pInFile, 0, SEEK_SET); - - if (nFileSize < 4) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "invalid file size for raw block mode\n"); - return 100; - } - } - - FILE *pOutFile = fopen(pszOutFilename, "wb"); - if (!pOutFile) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); - return 100; - } - - unsigned char *pInBlock; - unsigned char *pOutData; - - pInBlock = (unsigned char*)malloc(BLOCK_SIZE); - if (!pInBlock) { - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); - return 100; - } - - pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2); - if (!pOutData) { - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); - return 100; - } - - int nDictionaryDataSize = 0; - if (pszDictionaryFilename) { - FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); - if (!pDictionaryFile) { - free(pOutData); - pOutData = NULL; - - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - - fprintf(stderr, "error opening dictionary file\n"); - return 100; - } - - fseek(pDictionaryFile, 0, SEEK_END); -#ifdef _WIN32 - __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile); -#else - off_t nDictionaryFileSize = ftello(pDictionaryFile); -#endif - if (nDictionaryFileSize > BLOCK_SIZE) { - /* Use the last BLOCK_SIZE bytes of the dictionary */ - fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END); - } - else { - fseek(pDictionaryFile, 0, SEEK_SET); - } - - nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE, pDictionaryFile); - if (nDictionaryDataSize < 0) - nDictionaryDataSize = 0; - - fclose(pDictionaryFile); - pDictionaryFile = NULL; - } + nFlags = 0; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; if (nOptions & OPT_VERBOSE) { nStartTime = do_get_time(); } - int nDecompressionError = 0; - int nPrevDecompressedSize = 0; + nStatus = lzsa_decompress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize); - if (nDictionaryDataSize) { - nPrevDecompressedSize = nDictionaryDataSize; + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break; + case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break; + case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break; } - while (!feof(pInFile) && !nDecompressionError) { - unsigned int nBlockSize = 0; - int nIsUncompressed = 0; - - if (nPrevDecompressedSize != 0) { - memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize); - } - - if ((nOptions & OPT_RAW) == 0) { - const int nFrameSize = lzsa_get_frame_size(); - - memset(cFrameData, 0, 16); - if (fread(cFrameData, 1, nFrameSize, pInFile) == nFrameSize) { - if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) { - nDecompressionError = 1; - nBlockSize = 0; - } - } - else { - nBlockSize = 0; - } - } - else { - if (nFileSize >= 4) - nBlockSize = nFileSize - 4; - nFileSize = 0; - } - - if (nBlockSize != 0) { - int nDecompressedSize = 0; - - if ((int)nBlockSize > BLOCK_SIZE) { - fprintf(stderr, "block size %d > max size %d\n", nBlockSize, BLOCK_SIZE); - break; - } - if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) { - if (nIsUncompressed) { - memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize); - nDecompressedSize = nBlockSize; - } - else { - unsigned int nBlockOffs = 0; - - nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); - if (nDecompressedSize < 0) { - nDecompressionError = nDecompressedSize; - break; - } - } - - if (nDecompressedSize != 0) { - nOriginalSize += (long long)nDecompressedSize; - - fwrite(pOutData + BLOCK_SIZE, 1, nDecompressedSize, pOutFile); - nPrevDecompressedSize = nDecompressedSize; - nDecompressedSize = 0; - } - } - else { - break; - } - } - else { - break; - } - } - - free(pOutData); - pOutData = NULL; - - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - - if (nDecompressionError) { + if (nStatus) { fprintf(stderr, "decompression error for '%s'\n", pszInFilename); return 100; } @@ -570,258 +170,141 @@ static int do_decompress(const char *pszInFilename, const char *pszOutFilename, } } +/*---------------------------------------------------------------------------*/ + +typedef struct { + FILE *f; + void *pCompareDataBuf; + size_t nCompareDataSize; +} compare_stream_t; + +void comparestream_close(lzsa_stream_t *stream) { + if (stream->obj) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + if (pCompareStream->pCompareDataBuf) { + free(pCompareStream->pCompareDataBuf); + pCompareStream->pCompareDataBuf = NULL; + } + + fclose(pCompareStream->f); + free(pCompareStream); + + stream->obj = NULL; + stream->read = NULL; + stream->write = NULL; + stream->eof = NULL; + stream->close = NULL; + } +} + +size_t comparestream_read(lzsa_stream_t *stream, void *ptr, size_t size) { + return 0; +} + +size_t comparestream_write(lzsa_stream_t *stream, void *ptr, size_t size) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + + if (!pCompareStream->pCompareDataBuf || pCompareStream->nCompareDataSize < size) { + pCompareStream->nCompareDataSize = size; + pCompareStream->pCompareDataBuf = realloc(pCompareStream->pCompareDataBuf, pCompareStream->nCompareDataSize); + if (!pCompareStream->pCompareDataBuf) + return 0; + } + + size_t nReadBytes = fread(pCompareStream->pCompareDataBuf, 1, size, pCompareStream->f); + if (nReadBytes != size) { + return 0; + } + + if (memcmp(ptr, pCompareStream->pCompareDataBuf, size)) { + return 0; + } + + return size; +} + +int comparestream_eof(lzsa_stream_t *stream) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + return feof(pCompareStream->f); +} + +int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, const char *pszMode) { + compare_stream_t *pCompareStream; + + pCompareStream = (compare_stream_t*)malloc(sizeof(compare_stream_t)); + if (!pCompareStream) + return -1; + + pCompareStream->pCompareDataBuf = NULL; + pCompareStream->nCompareDataSize = 0; + pCompareStream->f = (void*)fopen(pszCompareFilename, pszMode); + + if (pCompareStream->f) { + stream->obj = pCompareStream; + stream->read = comparestream_read; + stream->write = comparestream_write; + stream->eof = comparestream_eof; + stream->close = comparestream_close; + return 0; + } + else + return -1; +} + static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { + lzsa_stream_t inStream, compareStream; long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL; - long long nKnownGoodSize = 0LL; - unsigned int nFileSize = 0; - unsigned char cFrameData[16]; + long long nCompressedSize = 0LL; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + int nFlags; - FILE *pInFile = fopen(pszInFilename, "rb"); - if (!pInFile) { + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { fprintf(stderr, "error opening compressed input file\n"); return 100; } - if ((nOptions & OPT_RAW) == 0) { - const int nHeaderSize = lzsa_get_header_size(); - - memset(cFrameData, 0, 16); - if (fread(cFrameData, 1, nHeaderSize, pInFile) != nHeaderSize) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error reading header in compressed input file\n"); - return 100; - } - - if (lzsa_decode_header(cFrameData, nHeaderSize, &nFormatVersion) < 0) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "invalid magic number or format version in input file\n"); - return 100; - } - } - else { - fseek(pInFile, 0, SEEK_END); - nFileSize = (unsigned int)ftell(pInFile); - fseek(pInFile, 0, SEEK_SET); - - if (nFileSize < 4) { - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "invalid file size for raw block mode\n"); - return 100; - } - } - - FILE *pOutFile = fopen(pszOutFilename, "rb"); - if (!pOutFile) { - fclose(pInFile); - pInFile = NULL; + if (comparestream_open(&compareStream, pszOutFilename, "rb") < 0) { fprintf(stderr, "error opening original uncompressed file\n"); + inStream.close(&inStream); return 100; } - unsigned char *pInBlock; - unsigned char *pOutData; - unsigned char *pCompareData; - - pInBlock = (unsigned char*)malloc(BLOCK_SIZE); - if (!pInBlock) { - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + if (nStatus) { + compareStream.close(&compareStream); + inStream.close(&inStream); + fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); return 100; } - pOutData = (unsigned char*)malloc(BLOCK_SIZE * 2); - if (!pOutData) { - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); - return 100; - } - - pCompareData = (unsigned char*)malloc(BLOCK_SIZE); - if (!pCompareData) { - free(pOutData); - pOutData = NULL; - - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - fprintf(stderr, "error opening output file\n"); - return 100; - } - - int nDictionaryDataSize = 0; - if (pszDictionaryFilename) { - FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); - if (!pDictionaryFile) { - free(pCompareData); - pCompareData = NULL; - - free(pOutData); - pOutData = NULL; - - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - - fprintf(stderr, "error opening dictionary file\n"); - return 100; - } - - fseek(pDictionaryFile, 0, SEEK_END); -#ifdef _WIN32 - __int64 nDictionaryFileSize = _ftelli64(pDictionaryFile); -#else - off_t nDictionaryFileSize = ftello(pDictionaryFile); -#endif - if (nDictionaryFileSize > BLOCK_SIZE) { - /* Use the last BLOCK_SIZE bytes of the dictionary */ - fseek(pDictionaryFile, -BLOCK_SIZE, SEEK_END); - } - else { - fseek(pDictionaryFile, 0, SEEK_SET); - } - - nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE, pDictionaryFile); - if (nDictionaryDataSize < 0) - nDictionaryDataSize = 0; - - fclose(pDictionaryFile); - pDictionaryFile = NULL; - } + nFlags = 0; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; if (nOptions & OPT_VERBOSE) { nStartTime = do_get_time(); } - int nDecompressionError = 0; - bool bComparisonError = false; - int nPrevDecompressedSize = 0; + nStatus = lzsa_decompress_stream(&inStream, &compareStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize); - if (nDictionaryDataSize) { - nPrevDecompressedSize = nDictionaryDataSize; + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break; + case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break; } - while (!feof(pInFile) && !nDecompressionError && !bComparisonError) { - unsigned int nBlockSize = 0; - int nIsUncompressed = 0; + lzsa_dictionary_free(&pDictionaryData); + compareStream.close(&compareStream); + inStream.close(&inStream); - if (nPrevDecompressedSize != 0) { - memcpy(pOutData + BLOCK_SIZE - nPrevDecompressedSize, pOutData + BLOCK_SIZE, nPrevDecompressedSize); - } - - int nBytesToCompare = (int)fread(pCompareData, 1, BLOCK_SIZE, pOutFile); - - if ((nOptions & OPT_RAW) == 0) { - const int nFrameSize = lzsa_get_frame_size(); - - memset(cFrameData, 0, 16); - if (fread(cFrameData, 1, nFrameSize, pInFile) == nFrameSize) { - if (lzsa_decode_frame(cFrameData, nFrameSize, &nBlockSize, &nIsUncompressed) < 0) { - nDecompressionError = 1; - nBlockSize = 0; - } - } - else { - nBlockSize = 0; - } - } - else { - if (nFileSize >= 4) - nBlockSize = nFileSize - 4; - nFileSize = 0; - } - - if (nBlockSize != 0) { - int nDecompressedSize = 0; - - if ((int)nBlockSize > BLOCK_SIZE) { - fprintf(stderr, "block size %d > max size %d\n", nBlockSize, BLOCK_SIZE); - break; - } - if (fread(pInBlock, 1, nBlockSize, pInFile) == nBlockSize) { - if (nIsUncompressed) { - memcpy(pOutData + BLOCK_SIZE, pInBlock, nBlockSize); - nDecompressedSize = nBlockSize; - } - else { - unsigned int nBlockOffs = 0; - - nDecompressedSize = lzsa_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); - if (nDecompressedSize < 0) { - nDecompressionError = nDecompressedSize; - break; - } - } - - if (nDecompressedSize == nBytesToCompare) { - nKnownGoodSize = nOriginalSize; - - nOriginalSize += (long long)nDecompressedSize; - - if (memcmp(pOutData + BLOCK_SIZE, pCompareData, nBytesToCompare)) - bComparisonError = true; - nPrevDecompressedSize = nDecompressedSize; - nDecompressedSize = 0; - } - else { - bComparisonError = true; - break; - } - } - else { - break; - } - } - else { - break; - } - } - - free(pCompareData); - pCompareData = NULL; - - free(pOutData); - pOutData = NULL; - - free(pInBlock); - pInBlock = NULL; - - fclose(pOutFile); - pOutFile = NULL; - - fclose(pInFile); - pInFile = NULL; - - if (nDecompressionError) { - fprintf(stderr, "decompression error for '%s'\n", pszInFilename); - return 100; - } - else if (bComparisonError) { - fprintf(stderr, "error comparing compressed file '%s' with original '%s' starting at %lld\n", pszInFilename, pszOutFilename, nKnownGoodSize); + if (nStatus) { return 100; } else { diff --git a/src/stream.c b/src/stream.c new file mode 100644 index 0000000..8937487 --- /dev/null +++ b/src/stream.c @@ -0,0 +1,111 @@ +/* + * stream.c - streaming I/O implementation + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#include "stream.h" + +/** + * Close file stream + * + * @param stream stream + */ +static void lzsa_filestream_close(lzsa_stream_t *stream) { + if (stream->obj) { + fclose((FILE*)stream->obj); + stream->obj = NULL; + stream->read = NULL; + stream->write = NULL; + stream->eof = NULL; + stream->close = NULL; + } +} + +/** + * Read from file stream + * + * @param stream stream + * @param ptr buffer to read into + * @param size number of bytes to read + * + * @return number of bytes read + */ +static size_t lzsa_filestream_read(lzsa_stream_t *stream, void *ptr, size_t size) { + return fread(ptr, 1, size, (FILE*)stream->obj); +} + +/** + * Write to file stream + * + * @param stream stream + * @param ptr buffer to write from + * @param size number of bytes to write + * + * @return number of bytes written + */ +static size_t lzsa_filestream_write(lzsa_stream_t *stream, void *ptr, size_t size) { + return fwrite(ptr, 1, size, (FILE*)stream->obj); +} + +/** + * Check if file stream has reached the end of the data + * + * @param stream stream + * + * @return nonzero if the end of the data has been reached, 0 if there is more data + */ +static int lzsa_filestream_eof(lzsa_stream_t *stream) { + return feof((FILE*)stream->obj); +} + +/** + * Open file and create an I/O stream from it + * + * @param stream stream to fill out + * @param pszInFilename filename + * @param pszMode open mode, as with fopen() + * + * @return 0 for success, nonzero for failure + */ +int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode) { + stream->obj = (void*)fopen(pszInFilename, pszMode); + if (stream->obj) { + stream->read = lzsa_filestream_read; + stream->write = lzsa_filestream_write; + stream->eof = lzsa_filestream_eof; + stream->close = lzsa_filestream_close; + return 0; + } + else + return -1; +} diff --git a/src/stream.h b/src/stream.h new file mode 100644 index 0000000..fb305f7 --- /dev/null +++ b/src/stream.h @@ -0,0 +1,95 @@ +/* + * stream.h - streaming I/O definitions + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#ifndef _STREAM_H +#define _STREAM_H + +/* Forward declaration */ +typedef struct _lzsa_stream_t lzsa_stream_t; + +/* I/O stream */ +typedef struct _lzsa_stream_t { + /** Opaque stream-specific pointer */ + void *obj; + + /** + * Read from stream + * + * @param stream stream + * @param ptr buffer to read into + * @param size number of bytes to read + * + * @return number of bytes read + */ + size_t(*read)(lzsa_stream_t *stream, void *ptr, size_t size); + + /** + * Write to stream + * + * @param stream stream + * @param ptr buffer to write from + * @param size number of bytes to write + * + * @return number of bytes written + */ + size_t(*write)(lzsa_stream_t *stream, void *ptr, size_t size); + + + /** + * Check if stream has reached the end of the data + * + * @param stream stream + * + * @return nonzero if the end of the data has been reached, 0 if there is more data + */ + int(*eof)(lzsa_stream_t *stream); + + /** + * Close stream + * + * @param stream stream + */ + void(*close)(lzsa_stream_t *stream); +} lzsa_stream_t; + +/** + * Open file and create an I/O stream from it + * + * @param stream stream to fill out + * @param pszInFilename filename + * @param pszMode open mode, as with fopen() + * + * @return 0 for success, nonzero for failure + */ +int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode); + +#endif /* _STREAM_H */