diff --git a/lzsa.c b/lzsa.c new file mode 100644 index 0000000..c456081 --- /dev/null +++ b/lzsa.c @@ -0,0 +1,1109 @@ +/* + * lzsa.c - command line compression utility for the LZSA format + * + * Copyright (C) 2019 Emmanuel Marty + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgment in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori + * + * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 + * With help, ideas, optimizations and speed measurements by spke + * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/ + * + */ + +#include +#include +#include +#ifdef _WIN32 +#include +#include +#else +#include +#endif +#include "lib.h" + +#define OPT_VERBOSE 1 +#define OPT_RAW 2 +#define OPT_FAVOR_RATIO 4 +#define OPT_RAW_BACKWARD 8 +#define OPT_STATS 16 + +#define TOOL_VERSION "1.3.4" + +/*---------------------------------------------------------------------------*/ + +#ifdef _WIN32 +LARGE_INTEGER hpc_frequency; +BOOL hpc_available = FALSE; +#endif + +static void do_init_time() { +#ifdef _WIN32 + hpc_frequency.QuadPart = 0; + hpc_available = QueryPerformanceFrequency(&hpc_frequency); +#endif +} + +static long long do_get_time() { + long long nTime; + +#ifdef _WIN32 + if (hpc_available) { + LARGE_INTEGER nCurTime; + + /* Use HPC hardware for best precision */ + QueryPerformanceCounter(&nCurTime); + nTime = (long long)(nCurTime.QuadPart * 1000000LL / hpc_frequency.QuadPart); + } + else { + struct _timeb tb; + _ftime(&tb); + + nTime = ((long long)tb.time * 1000LL + (long long)tb.millitm) * 1000LL; + } +#else + struct timeval tm; + gettimeofday(&tm, NULL); + + nTime = (long long)tm.tv_sec * 1000000LL + (long long)tm.tv_usec; +#endif + return nTime; +} + +/*---------------------------------------------------------------------------*/ + +static void compression_progress(long long nOriginalSize, long long nCompressedSize) { + if (nOriginalSize >= 1024 * 1024) { + fprintf(stdout, "\r%lld => %lld (%g %%) \b\b\b\b\b", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + fflush(stdout); + } +} + +static int do_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, const int nFormatVersion) { + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + int nCommandCount = 0, nSafeDist = 0; + int nFlags; + lzsa_status_t nStatus; + lzsa_stats stats; + + nFlags = 0; + if (nOptions & OPT_FAVOR_RATIO) + nFlags |= LZSA_FLAG_FAVOR_RATIO; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist, &stats); + + if ((nOptions & OPT_VERBOSE)) { + nEndTime = do_get_time(); + } + + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break; + case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_COMPRESSION: fprintf(stderr, "internal compression error\n"); break; + case LZSA_ERROR_RAW_TOOLARGE: fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n"); break; + case LZSA_ERROR_RAW_UNCOMPRESSED: fprintf(stderr, "error: incompressible data needs to be <= 64 Kb in raw blocks\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown compression error %d\n", nStatus); break; + } + + if (nStatus) + return 100; + + if ((nOptions & OPT_VERBOSE)) { + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n", + pszInFilename, fDelta, fSpeed, nCommandCount, (double)nOriginalSize / (double)nCommandCount, + nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + if (nOptions & OPT_RAW) { + fprintf(stdout, "Safe distance: %d (0x%X)\n", nSafeDist, nSafeDist); + } + } + + if (nOptions & OPT_STATS) { + if (stats.literals_divisor > 0) + fprintf(stdout, "Literals: min: %d avg: %d max: %d count: %d\n", stats.min_literals, stats.total_literals / stats.literals_divisor, stats.max_literals, stats.literals_divisor); + else + fprintf(stdout, "Literals: none\n"); + if (stats.match_divisor > 0) { + fprintf(stdout, "Offsets: min: %d avg: %d max: %d reps: %d count: %d\n", stats.min_offset, stats.total_offsets / stats.match_divisor, stats.max_offset, stats.num_rep_offsets, stats.match_divisor); + fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor); + } + else { + fprintf(stdout, "Offsets: none\n"); + fprintf(stdout, "Match lens: none\n"); + } + if (stats.rle1_divisor > 0) { + fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor); + } + else { + fprintf(stdout, "RLE1 lens: none\n"); + } + if (stats.rle2_divisor > 0) { + fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor); + } + else { + fprintf(stdout, "RLE2 lens: none\n"); + } + } + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL, nCompressedSize = 0LL; + lzsa_status_t nStatus; + int nFlags; + + nFlags = 0; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + nStatus = lzsa_decompress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize); + + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error writing '%s'\n", pszOutFilename); break; + case LZSA_ERROR_DICTIONARY: fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break; + case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break; + } + + if (nStatus) { + fprintf(stderr, "decompression error for '%s'\n", pszInFilename); + return 100; + } + else { + if (nOptions & OPT_VERBOSE) { + nEndTime = do_get_time(); + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "Decompressed '%s' in %g seconds, %g Mb/s\n", + pszInFilename, fDelta, fSpeed); + } + + return 0; + } +} + +/*---------------------------------------------------------------------------*/ + +typedef struct { + FILE *f; + void *pCompareDataBuf; + size_t nCompareDataSize; +} compare_stream_t; + +void comparestream_close(lzsa_stream_t *stream) { + if (stream->obj) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + if (pCompareStream->pCompareDataBuf) { + free(pCompareStream->pCompareDataBuf); + pCompareStream->pCompareDataBuf = NULL; + } + + fclose(pCompareStream->f); + free(pCompareStream); + + stream->obj = NULL; + stream->read = NULL; + stream->write = NULL; + stream->eof = NULL; + stream->close = NULL; + } +} + +size_t comparestream_read(lzsa_stream_t *stream, void *ptr, size_t size) { + return 0; +} + +size_t comparestream_write(lzsa_stream_t *stream, void *ptr, size_t size) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + + if (!pCompareStream->pCompareDataBuf || pCompareStream->nCompareDataSize < size) { + pCompareStream->nCompareDataSize = size; + pCompareStream->pCompareDataBuf = realloc(pCompareStream->pCompareDataBuf, pCompareStream->nCompareDataSize); + if (!pCompareStream->pCompareDataBuf) + return 0; + } + + size_t nReadBytes = fread(pCompareStream->pCompareDataBuf, 1, size, pCompareStream->f); + if (nReadBytes != size) { + return 0; + } + + if (memcmp(ptr, pCompareStream->pCompareDataBuf, size)) { + return 0; + } + + return size; +} + +int comparestream_eof(lzsa_stream_t *stream) { + compare_stream_t *pCompareStream = (compare_stream_t *)stream->obj; + return feof(pCompareStream->f); +} + +int comparestream_open(lzsa_stream_t *stream, const char *pszCompareFilename, const char *pszMode) { + compare_stream_t *pCompareStream; + + pCompareStream = (compare_stream_t*)malloc(sizeof(compare_stream_t)); + if (!pCompareStream) + return -1; + + pCompareStream->pCompareDataBuf = NULL; + pCompareStream->nCompareDataSize = 0; + pCompareStream->f = (FILE*)fopen(pszCompareFilename, pszMode); + + if (pCompareStream->f) { + stream->obj = pCompareStream; + stream->read = comparestream_read; + stream->write = comparestream_write; + stream->eof = comparestream_eof; + stream->close = comparestream_close; + return 0; + } + else { + free(pCompareStream); + return -1; + } +} + +static int do_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { + lzsa_stream_t inStream, compareStream; + long long nStartTime = 0LL, nEndTime = 0LL; + long long nOriginalSize = 0LL; + long long nCompressedSize = 0LL; + void *pDictionaryData = NULL; + int nDictionaryDataSize = 0; + lzsa_status_t nStatus; + int nFlags; + + if (lzsa_filestream_open(&inStream, pszInFilename, "rb") < 0) { + fprintf(stderr, "error opening compressed input file\n"); + return 100; + } + + if (comparestream_open(&compareStream, pszOutFilename, "rb") < 0) { + fprintf(stderr, "error opening original uncompressed file\n"); + inStream.close(&inStream); + return 100; + } + + nStatus = lzsa_dictionary_load(pszDictionaryFilename, &pDictionaryData, &nDictionaryDataSize); + if (nStatus) { + compareStream.close(&compareStream); + inStream.close(&inStream); + fprintf(stderr, "error reading dictionary '%s'\n", pszDictionaryFilename); + return 100; + } + + nFlags = 0; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + if (nOptions & OPT_VERBOSE) { + nStartTime = do_get_time(); + } + + nStatus = lzsa_decompress_stream(&inStream, &compareStream, pDictionaryData, nDictionaryDataSize, nFlags, nFormatVersion, &nOriginalSize, &nCompressedSize); + + switch (nStatus) { + case LZSA_ERROR_SRC: fprintf(stderr, "error reading '%s'\n", pszInFilename); break; + case LZSA_ERROR_DST: fprintf(stderr, "error comparing compressed file '%s' with original '%s'\n", pszInFilename, pszOutFilename); break; + case LZSA_ERROR_MEMORY: fprintf(stderr, "out of memory\n"); break; + case LZSA_ERROR_DECOMPRESSION: fprintf(stderr, "internal decompression error\n"); break; + case LZSA_ERROR_FORMAT: fprintf(stderr, "invalid magic number or format version in input file\n"); break; + case LZSA_OK: break; + default: fprintf(stderr, "unknown decompression error %d\n", nStatus); break; + } + + lzsa_dictionary_free(&pDictionaryData); + compareStream.close(&compareStream); + inStream.close(&inStream); + + if (nStatus) { + return 100; + } + else { + if (nOptions & OPT_VERBOSE) { + nEndTime = do_get_time(); + double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; + double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; + fprintf(stdout, "Compared '%s' in %g seconds, %g Mb/s\n", + pszInFilename, fDelta, fSpeed); + } + + return 0; + } +} + +/*---------------------------------------------------------------------------*/ + +static void generate_compressible_data(unsigned char *pBuffer, size_t nBufferSize, int nMinMatchSize, unsigned int nSeed, int nNumLiteralValues, float fMatchProbability) { + size_t nIndex = 0; + int nMatchProbability = (int)(fMatchProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + pBuffer[nIndex++] = rand() % nNumLiteralValues; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) >= nMatchProbability) { + size_t nLiteralCount = rand() & 127; + if (nLiteralCount > (nBufferSize - nIndex)) + nLiteralCount = nBufferSize - nIndex; + + while (nLiteralCount--) + pBuffer[nIndex++] = rand() % nNumLiteralValues; + } + else { + size_t nMatchLength = nMinMatchSize + (rand() & 1023); + size_t nMatchOffset; + + if (nMatchLength > (nBufferSize - nIndex)) + nMatchLength = nBufferSize - nIndex; + if (nMatchLength > nIndex) + nMatchLength = nIndex; + + if (nMatchLength < nIndex) + nMatchOffset = rand() % (nIndex - nMatchLength); + else + nMatchOffset = 0; + + while (nMatchLength--) { + pBuffer[nIndex] = pBuffer[nIndex - nMatchOffset]; + nIndex++; + } + } + } +} + +static void xor_data(unsigned char *pBuffer, size_t nBufferSize, unsigned int nSeed, float fXorProbability) { + size_t nIndex = 0; + int nXorProbability = (int)(fXorProbability * 1023.0f); + + srand(nSeed); + + if (nIndex >= nBufferSize) return; + + while (nIndex < nBufferSize) { + if ((rand() & 1023) < nXorProbability) { + pBuffer[nIndex] ^= 0xff; + } + nIndex++; + } +} + +static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) { + unsigned char *pGeneratedData; + unsigned char *pCompressedData; + unsigned char *pTmpCompressedData; + unsigned char *pTmpDecompressedData; + size_t nGeneratedDataSize; + size_t nMaxCompressedDataSize; + unsigned int nSeed = 123; + int nFlags; + int i; + + nFlags = 0; + if (nOptions & OPT_FAVOR_RATIO) + nFlags |= LZSA_FLAG_FAVOR_RATIO; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + pGeneratedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pGeneratedData) { + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + nMaxCompressedDataSize = lzsa_get_max_compressed_size_inmem(4 * BLOCK_SIZE); + pCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pCompressedData) { + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpCompressedData = (unsigned char*)malloc(nMaxCompressedDataSize); + if (!pTmpCompressedData) { + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %zd bytes needed\n", nMaxCompressedDataSize); + return 100; + } + + pTmpDecompressedData = (unsigned char*)malloc(4 * BLOCK_SIZE); + if (!pTmpDecompressedData) { + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "out of memory, %d bytes needed\n", 4 * BLOCK_SIZE); + return 100; + } + + memset(pGeneratedData, 0, 4 * BLOCK_SIZE); + memset(pCompressedData, 0, nMaxCompressedDataSize); + memset(pTmpCompressedData, 0, nMaxCompressedDataSize); + + /* Test compressing with a too small buffer to do anything, expect to fail cleanly */ + for (i = 0; i < 12; i++) { + generate_compressible_data(pGeneratedData, i, nMinMatchSize, nSeed, 256, 0.5f); + lzsa_compress_inmem(pGeneratedData, pCompressedData, i, i, nFlags, nMinMatchSize, nFormatVersion); + } + + size_t nDataSizeStep = 128; + float fProbabilitySizeStep = 0.0005f; + + for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((size_t)((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE))); nGeneratedDataSize += nDataSizeStep) { + float fMatchProbability; + + fprintf(stdout, "size %zd", nGeneratedDataSize); + for (fMatchProbability = 0; fMatchProbability <= 0.995f; fMatchProbability += fProbabilitySizeStep) { + int nNumLiteralValues[12] = { 1, 2, 3, 15, 30, 56, 96, 137, 178, 191, 255, 256 }; + float fXorProbability; + + fputc('.', stdout); + fflush(stdout); + + for (i = 0; i < 12; i++) { + /* Generate data to compress */ + generate_compressible_data(pGeneratedData, nGeneratedDataSize, nMinMatchSize, nSeed, nNumLiteralValues[i], fMatchProbability); + + /* Try to compress it, expected to succeed */ + size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize), + nFlags, nMinMatchSize, nFormatVersion); + if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error compressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress it, expected to succeed */ + size_t nActualDecompressedSize; + int nDecFormatVersion = nFormatVersion; + nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion); + if (nActualDecompressedSize == -1) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error decompressing size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + if (memcmp(pGeneratedData, pTmpDecompressedData, nGeneratedDataSize)) { + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + free(pTmpCompressedData); + pTmpCompressedData = NULL; + free(pCompressedData); + pCompressedData = NULL; + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stderr, "\nself-test: error comparing decompressed and original data, size %zd, seed %d, match probability %f, literals range %d\n", nGeneratedDataSize, nSeed, fMatchProbability, nNumLiteralValues[i]); + return 100; + } + + /* Try to decompress corrupted data, expected to fail cleanly, without crashing or corrupting memory outside the output buffer */ + for (fXorProbability = 0.05f; fXorProbability <= 0.5f; fXorProbability += 0.05f) { + memcpy(pTmpCompressedData, pCompressedData, nActualCompressedSize); + xor_data(pTmpCompressedData + lzsa_get_header_size() + lzsa_get_frame_size(), nActualCompressedSize - lzsa_get_header_size() - lzsa_get_frame_size() - lzsa_get_frame_size() /* footer */, nSeed, fXorProbability); + nDecFormatVersion = nFormatVersion; + lzsa_decompress_inmem(pTmpCompressedData, pGeneratedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion); + } + } + + nSeed++; + } + + fputc(10, stdout); + fflush(stdout); + + nDataSizeStep <<= 1; + if (nDataSizeStep > (128 * 4096)) + nDataSizeStep = 128 * 4096; + fProbabilitySizeStep *= 1.25; + if (fProbabilitySizeStep > (0.0005f * 4096)) + fProbabilitySizeStep = 0.0005f * 4096; + } + + free(pTmpDecompressedData); + pTmpDecompressedData = NULL; + + free(pTmpCompressedData); + pTmpCompressedData = NULL; + + free(pCompressedData); + pCompressedData = NULL; + + free(pGeneratedData); + pGeneratedData = NULL; + + fprintf(stdout, "All tests passed.\n"); + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize, int nFormatVersion) { + size_t nFileSize, nMaxCompressedSize; + unsigned char *pFileData; + unsigned char *pCompressedData; + int nFlags; + int i; + + nFlags = 0; + if (nOptions & OPT_FAVOR_RATIO) + nFlags |= LZSA_FLAG_FAVOR_RATIO; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole original file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + /* Allocate max compressed size */ + + nMaxCompressedSize = lzsa_get_max_compressed_size_inmem(nFileSize); + + pCompressedData = (unsigned char*)malloc(nMaxCompressedSize + 2048); + if (!pCompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for compressing '%s', %zd bytes needed\n", pszInFilename, nMaxCompressedSize); + return 100; + } + + memset(pCompressedData + 1024, 0, nMaxCompressedSize); + + long long nBestCompTime = -1; + + size_t nActualCompressedSize = 0; + size_t nRightGuardPos = nMaxCompressedSize; + + for (i = 0; i < 5; i++) { + unsigned char nGuard = 0x33 + i; + int j; + + /* Write guard bytes around the output buffer, to help check for writes outside of it by the compressor */ + memset(pCompressedData, nGuard, 1024); + memset(pCompressedData + 1024 + nRightGuardPos, nGuard, 1024); + + long long t0 = do_get_time(); + nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion); + long long t1 = do_get_time(); + if (nActualCompressedSize == -1) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "compression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestCompTime == -1 || nBestCompTime > nCurDecTime) + nBestCompTime = nCurDecTime; + + /* Check guard bytes before the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j - 1024); + return 100; + } + } + + /* Check guard bytes after the output buffer */ + for (j = 0; j < 1024; j++) { + if (pCompressedData[1024 + nRightGuardPos + j] != nGuard) { + free(pCompressedData); + free(pFileData); + fprintf(stderr, "error, wrote outside of output buffer at %d!\n", j); + return 100; + } + } + + nRightGuardPos = nActualCompressedSize; + } + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole compressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pCompressedData + 1024, 1, nActualCompressedSize, f_out); + fclose(f_out); + } + } + + free(pCompressedData); + free(pFileData); + + fprintf(stdout, "compressed size: %zd bytes\n", nActualCompressedSize); + fprintf(stdout, "compression time: %lld microseconds (%g Mb/s)\n", nBestCompTime, ((double)nActualCompressedSize / 1024.0) / ((double)nBestCompTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, int nFormatVersion) { + size_t nFileSize, nMaxDecompressedSize; + unsigned char *pFileData; + unsigned char *pDecompressedData; + int nFlags; + int i; + + nFlags = 0; + if (nOptions & OPT_RAW) + nFlags |= LZSA_FLAG_RAW_BLOCK; + if (nOptions & OPT_RAW_BACKWARD) + nFlags |= LZSA_FLAG_RAW_BACKWARD; + + if (pszDictionaryFilename) { + fprintf(stderr, "in-memory benchmarking does not support dictionaries\n"); + return 100; + } + + /* Read the whole compressed file in memory */ + + FILE *f_in = fopen(pszInFilename, "rb"); + if (!f_in) { + fprintf(stderr, "error opening '%s' for reading\n", pszInFilename); + return 100; + } + + fseek(f_in, 0, SEEK_END); + nFileSize = (size_t)ftell(f_in); + fseek(f_in, 0, SEEK_SET); + + pFileData = (unsigned char*)malloc(nFileSize); + if (!pFileData) { + fclose(f_in); + fprintf(stderr, "out of memory for reading '%s', %zd bytes needed\n", pszInFilename, nFileSize); + return 100; + } + + if (fread(pFileData, 1, nFileSize, f_in) != nFileSize) { + free(pFileData); + fclose(f_in); + fprintf(stderr, "I/O error while reading '%s'\n", pszInFilename); + return 100; + } + + fclose(f_in); + + /* Allocate max decompressed size */ + + if (nOptions & OPT_RAW) + nMaxDecompressedSize = 65536; + else + nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize); + if (nMaxDecompressedSize == -1) { + free(pFileData); + fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename); + return 100; + } + + pDecompressedData = (unsigned char*)malloc(nMaxDecompressedSize); + if (!pDecompressedData) { + free(pFileData); + fprintf(stderr, "out of memory for decompressing '%s', %zd bytes needed\n", pszInFilename, nMaxDecompressedSize); + return 100; + } + + memset(pDecompressedData, 0, nMaxDecompressedSize); + + long long nBestDecTime = -1; + + size_t nActualDecompressedSize = 0; + for (i = 0; i < 50; i++) { + long long t0 = do_get_time(); + nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion); + long long t1 = do_get_time(); + if (nActualDecompressedSize == -1) { + free(pDecompressedData); + free(pFileData); + fprintf(stderr, "decompression error\n"); + return 100; + } + + long long nCurDecTime = t1 - t0; + if (nBestDecTime == -1 || nBestDecTime > nCurDecTime) + nBestDecTime = nCurDecTime; + } + + if (pszOutFilename) { + FILE *f_out; + + /* Write whole decompressed file out */ + + f_out = fopen(pszOutFilename, "wb"); + if (f_out) { + fwrite(pDecompressedData, 1, nActualDecompressedSize, f_out); + fclose(f_out); + } + } + + free(pDecompressedData); + free(pFileData); + + fprintf(stdout, "format: LZSA%d\n", nFormatVersion); + fprintf(stdout, "decompressed size: %zd bytes\n", nActualDecompressedSize); + fprintf(stdout, "decompression time: %lld microseconds (%g Mb/s)\n", nBestDecTime, ((double)nActualDecompressedSize / 1024.0) / ((double)nBestDecTime / 1000.0)); + + return 0; +} + +/*---------------------------------------------------------------------------*/ + +int main(int argc, char **argv) { + int i; + const char *pszInFilename = NULL; + const char *pszOutFilename = NULL; + const char *pszDictionaryFilename = NULL; + int nArgsError = 0; + int nCommandDefined = 0; + int nVerifyCompression = 0; + int nMinMatchDefined = 0; + int nFormatVersionDefined = 0; + char cCommand = 'z'; + int nMinMatchSize = 0; + unsigned int nOptions = OPT_FAVOR_RATIO; + int nFormatVersion = 1; + + for (i = 1; i < argc; i++) { + if (!strcmp(argv[i], "-d")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'd'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-z")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'z'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-c")) { + if (!nVerifyCompression) { + nVerifyCompression = 1; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-cbench")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'B'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-dbench")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 'b'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-test")) { + if (!nCommandDefined) { + nCommandDefined = 1; + cCommand = 't'; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-D")) { + if (!pszDictionaryFilename && (i + 1) < argc) { + pszDictionaryFilename = argv[i + 1]; + i++; + } + else + nArgsError = 1; + } + else if (!strncmp(argv[i], "-D", 2)) { + if (!pszDictionaryFilename) { + pszDictionaryFilename = argv[i] + 2; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-m")) { + if (!nMinMatchDefined && (i + 1) < argc) { + char *pEnd = NULL; + nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10); + if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) { + i++; + nMinMatchDefined = 1; + nOptions &= (~OPT_FAVOR_RATIO); + } + else { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strncmp(argv[i], "-m", 2)) { + if (!nMinMatchDefined) { + char *pEnd = NULL; + nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10); + if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= 2 && nMinMatchSize <= 5)) { + nMinMatchDefined = 1; + nOptions &= (~OPT_FAVOR_RATIO); + } + else { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "--prefer-ratio")) { + if (!nMinMatchDefined) { + nMinMatchSize = 0; + nMinMatchDefined = 1; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "--prefer-speed")) { + if (!nMinMatchDefined) { + nMinMatchSize = 3; + nOptions &= (~OPT_FAVOR_RATIO); + nMinMatchDefined = 1; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-f")) { + if (!nFormatVersionDefined && (i + 1) < argc) { + char *pEnd = NULL; + nFormatVersion = (int)strtol(argv[i + 1], &pEnd, 10); + if (pEnd && pEnd != argv[i + 1] && (nFormatVersion >= 1 && nFormatVersion <= 2)) { + i++; + nFormatVersionDefined = 1; + } + else { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strncmp(argv[i], "-f", 2)) { + if (!nFormatVersionDefined) { + char *pEnd = NULL; + nFormatVersion = (int)strtol(argv[i] + 2, &pEnd, 10); + if (pEnd && pEnd != (argv[i] + 2) && (nFormatVersion >= 1 && nFormatVersion <= 2)) { + nFormatVersionDefined = 1; + } + else { + nArgsError = 1; + } + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-v")) { + if ((nOptions & OPT_VERBOSE) == 0) { + nOptions |= OPT_VERBOSE; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-r")) { + if ((nOptions & OPT_RAW) == 0) { + nOptions |= OPT_RAW; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-b")) { + if ((nOptions & OPT_RAW_BACKWARD) == 0) { + nOptions |= OPT_RAW_BACKWARD; + } + else + nArgsError = 1; + } + else if (!strcmp(argv[i], "-stats")) { + if ((nOptions & OPT_STATS) == 0) { + nOptions |= OPT_STATS; + } + else + nArgsError = 1; + } + else { + if (!pszInFilename) + pszInFilename = argv[i]; + else { + if (!pszOutFilename) + pszOutFilename = argv[i]; + else + nArgsError = 1; + } + } + } + + if (!nArgsError && (nOptions & OPT_RAW_BACKWARD) && !(nOptions & OPT_RAW)) { + fprintf(stderr, "error: -b (compress backwards) requires -r (raw block format)\n"); + return 100; + } + + if (!nArgsError && cCommand == 't') { + return do_self_test(nOptions, nMinMatchSize, nFormatVersion); + } + + if (nArgsError || !pszInFilename || !pszOutFilename) { + fprintf(stderr, "lzsa command-line tool v" TOOL_VERSION " by Emmanuel Marty and spke\n"); + fprintf(stderr, "usage: %s [-c] [-d] [-v] [-r] \n", argv[0]); + fprintf(stderr, " -c: check resulting stream after compressing\n"); + fprintf(stderr, " -d: decompress (default: compress)\n"); + fprintf(stderr, " -cbench: benchmark in-memory compression\n"); + fprintf(stderr, " -dbench: benchmark in-memory decompression\n"); + fprintf(stderr, " -test: run automated self-tests\n"); + fprintf(stderr, " -stats: show compressed data stats\n"); + fprintf(stderr, " -v: be verbose\n"); + fprintf(stderr, " -f : LZSA compression format (1-2)\n"); + fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); + fprintf(stderr, " -b: compress backward (requires -r and a backward decompressor)\n"); + fprintf(stderr, " -D : use dictionary file\n"); + fprintf(stderr, " -m : minimum match size (3-5) (default: 3)\n"); + fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n"); + fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n"); + return 100; + } + + do_init_time(); + + if (cCommand == 'z') { + int nResult = do_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion); + if (nResult == 0 && nVerifyCompression) { + return do_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions, nFormatVersion); + } else { + return nResult; + } + } + else if (cCommand == 'd') { + return do_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); + } + else if (cCommand == 'B') { + return do_compr_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize, nFormatVersion); + } + else if (cCommand == 'b') { + return do_dec_benchmark(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nFormatVersion); + } + else { + return 100; + } +}