From bab5225e4cc65d78025a4a299e8630793501e28f Mon Sep 17 00:00:00 2001 From: emmanuel-marty Date: Thu, 2 May 2019 18:38:57 +0200 Subject: [PATCH] Implement dictionary support --- src/main.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 125 insertions(+), 8 deletions(-) diff --git a/src/main.c b/src/main.c index 74226e6..0bbbdd1 100755 --- a/src/main.c +++ b/src/main.c @@ -59,7 +59,7 @@ static long long lzsa_get_time() { /*---------------------------------------------------------------------------*/ -static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions, const int nMinMatchSize) { +static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) { FILE *f_in, *f_out; unsigned char *pInData, *pOutData; lsza_compressor compressor; @@ -110,6 +110,35 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, } memset(pOutData, 0, BLOCK_SIZE); + int nDictionaryDataSize = 0; + + if (pszDictionaryFilename) { + FILE *f_dictionary = fopen(pszDictionaryFilename, "rb"); + if (!f_dictionary) { + free(pOutData); + pOutData = NULL; + + free(pInData); + pInData = NULL; + + fclose(f_out); + f_out = NULL; + + fclose(f_in); + f_in = NULL; + + fprintf(stderr, "error opening dictionary '%s' for reading\n", pszInFilename); + return 100; + } + + nDictionaryDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, f_dictionary); + if (nDictionaryDataSize < 0) + nDictionaryDataSize = 0; + + fclose(f_dictionary); + f_dictionary = NULL; + } + nFlags = 0; if (nOptions & OPT_FAVOR_RATIO) nFlags |= LZSA_FLAG_FAVOR_RATIO; @@ -150,20 +179,24 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, int nPreviousBlockSize = 0; + if (nDictionaryDataSize) + nPreviousBlockSize = nDictionaryDataSize; + while (!feof(f_in) && !bError) { int nInDataSize; if (nPreviousBlockSize) { - memcpy(pInData, pInData + BLOCK_SIZE, nPreviousBlockSize); + memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize); } nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in); if (nInDataSize > 0) { - if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0) { + if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0 && !nDictionaryDataSize) { fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n"); bError = true; break; } + nDictionaryDataSize = 0; int nOutDataSize; @@ -285,7 +318,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, /*---------------------------------------------------------------------------*/ -static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) { +static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL; unsigned int nFileSize = 0; @@ -366,6 +399,34 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename return 100; } + int nDictionaryDataSize = 0; + if (pszDictionaryFilename) { + FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); + if (!pDictionaryFile) { + free(pOutData); + pOutData = NULL; + + free(pInBlock); + pInBlock = NULL; + + fclose(pOutFile); + pOutFile = NULL; + + fclose(pInFile); + pInFile = NULL; + + fprintf(stderr, "error opening dictionary file\n"); + return 100; + } + + nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile); + if (nDictionaryDataSize < 0) + nDictionaryDataSize = 0; + + fclose(pDictionaryFile); + pDictionaryFile = NULL; + } + if (nOptions & OPT_VERBOSE) { nStartTime = lzsa_get_time(); } @@ -373,6 +434,10 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename int nDecompressionError = 0; int nPrevDecompressedSize = 0; + if (nDictionaryDataSize) { + nPrevDecompressedSize = nDictionaryDataSize; + } + while (!feof(pInFile) && !nDecompressionError) { unsigned int nBlockSize = 0; @@ -469,7 +534,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename } } -static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) { +static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) { long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL; long long nKnownGoodSize = 0LL; @@ -569,6 +634,37 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c return 100; } + int nDictionaryDataSize = 0; + if (pszDictionaryFilename) { + FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb"); + if (!pDictionaryFile) { + free(pCompareData); + pCompareData = NULL; + + free(pOutData); + pOutData = NULL; + + free(pInBlock); + pInBlock = NULL; + + fclose(pOutFile); + pOutFile = NULL; + + fclose(pInFile); + pInFile = NULL; + + fprintf(stderr, "error opening dictionary file\n"); + return 100; + } + + nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile); + if (nDictionaryDataSize < 0) + nDictionaryDataSize = 0; + + fclose(pDictionaryFile); + pDictionaryFile = NULL; + } + if (nOptions & OPT_VERBOSE) { nStartTime = lzsa_get_time(); } @@ -577,6 +673,10 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c bool bComparisonError = false; int nPrevDecompressedSize = 0; + if (nDictionaryDataSize) { + nPrevDecompressedSize = nDictionaryDataSize; + } + while (!feof(pInFile) && !nDecompressionError && !bComparisonError) { unsigned int nBlockSize = 0; @@ -695,6 +795,7 @@ int main(int argc, char **argv) { int i; const char *pszInFilename = NULL; const char *pszOutFilename = NULL; + const char *pszDictionaryFilename = NULL; bool bArgsError = false; bool bCommandDefined = false; bool bVerifyCompression = false; @@ -727,6 +828,21 @@ int main(int argc, char **argv) { else bArgsError = true; } + else if (!strcmp(argv[i], "-D")) { + if (!pszDictionaryFilename && (i + 1) < argc) { + pszDictionaryFilename = argv[i + 1]; + i++; + } + else + bArgsError = true; + } + else if (!strncmp(argv[i], "-D", 2)) { + if (!pszDictionaryFilename) { + pszDictionaryFilename = argv[i] + 2; + } + else + bArgsError = true; + } else if (!strcmp(argv[i], "-m")) { if (!bMinMatchDefined && (i + 1) < argc) { char *pEnd = NULL; @@ -807,6 +923,7 @@ int main(int argc, char **argv) { fprintf(stderr, " -d: decompress (default: compress)\n"); fprintf(stderr, " -v: be verbose\n"); fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n"); + fprintf(stderr, " -D : use dictionary file\n"); fprintf(stderr, " -m : minimum match size (3-14) (default: 3)\n"); fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n"); fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n"); @@ -814,13 +931,13 @@ int main(int argc, char **argv) { } if (cCommand == 'z') { - int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions, nMinMatchSize); + int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize); if (nResult == 0 && bVerifyCompression) { - nResult = lzsa_compare(pszOutFilename, pszInFilename, nOptions); + nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions); } } else if (cCommand == 'd') { - return lzsa_decompress(pszInFilename, pszOutFilename, nOptions); + return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions); } else { return 100;