From bab5225e4cc65d78025a4a299e8630793501e28f Mon Sep 17 00:00:00 2001
From: emmanuel-marty <brothers@puzzlebrothers.com>
Date: Thu, 2 May 2019 18:38:57 +0200
Subject: [PATCH] Implement dictionary support

---
 src/main.c | 133 +++++++++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 125 insertions(+), 8 deletions(-)

diff --git a/src/main.c b/src/main.c
index 74226e6..0bbbdd1 100755
--- a/src/main.c
+++ b/src/main.c
@@ -59,7 +59,7 @@ static long long lzsa_get_time() {
 
 /*---------------------------------------------------------------------------*/
 
-static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions, const int nMinMatchSize) {
+static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) {
    FILE *f_in, *f_out;
    unsigned char *pInData, *pOutData;
    lsza_compressor compressor;
@@ -110,6 +110,35 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
    }
    memset(pOutData, 0, BLOCK_SIZE);
 
+   int nDictionaryDataSize = 0;
+
+   if (pszDictionaryFilename) {
+      FILE *f_dictionary = fopen(pszDictionaryFilename, "rb");
+      if (!f_dictionary) {
+         free(pOutData);
+         pOutData = NULL;
+
+         free(pInData);
+         pInData = NULL;
+
+         fclose(f_out);
+         f_out = NULL;
+
+         fclose(f_in);
+         f_in = NULL;
+
+         fprintf(stderr, "error opening dictionary '%s' for reading\n", pszInFilename);
+         return 100;
+      }
+
+      nDictionaryDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, f_dictionary);
+      if (nDictionaryDataSize < 0)
+         nDictionaryDataSize = 0;
+
+      fclose(f_dictionary);
+      f_dictionary = NULL;
+   }
+
    nFlags = 0;
    if (nOptions & OPT_FAVOR_RATIO)
       nFlags |= LZSA_FLAG_FAVOR_RATIO;
@@ -150,20 +179,24 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
 
    int nPreviousBlockSize = 0;
 
+   if (nDictionaryDataSize)
+      nPreviousBlockSize = nDictionaryDataSize;
+
    while (!feof(f_in) && !bError) {
       int nInDataSize;
 
       if (nPreviousBlockSize) {
-         memcpy(pInData, pInData + BLOCK_SIZE, nPreviousBlockSize);
+         memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
       }
 
       nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in);
       if (nInDataSize > 0) {
-         if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0) {
+         if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0 && !nDictionaryDataSize) {
             fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n");
             bError = true;
             break;
          }
+         nDictionaryDataSize = 0;
 
          int nOutDataSize;
 
@@ -285,7 +318,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
 
 /*---------------------------------------------------------------------------*/
 
-static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
+static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
    long long nStartTime = 0LL, nEndTime = 0LL;
    long long nOriginalSize = 0LL;
    unsigned int nFileSize = 0;
@@ -366,6 +399,34 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
       return 100;
    }
 
+   int nDictionaryDataSize = 0;
+   if (pszDictionaryFilename) {
+      FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
+      if (!pDictionaryFile) {
+         free(pOutData);
+         pOutData = NULL;
+
+         free(pInBlock);
+         pInBlock = NULL;
+
+         fclose(pOutFile);
+         pOutFile = NULL;
+
+         fclose(pInFile);
+         pInFile = NULL;
+
+         fprintf(stderr, "error opening dictionary file\n");
+         return 100;
+      }
+
+      nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile);
+      if (nDictionaryDataSize < 0)
+         nDictionaryDataSize = 0;
+
+      fclose(pDictionaryFile);
+      pDictionaryFile = NULL;
+   }
+
    if (nOptions & OPT_VERBOSE) {
       nStartTime = lzsa_get_time();
    }
@@ -373,6 +434,10 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
    int nDecompressionError = 0;
    int nPrevDecompressedSize = 0;
 
+   if (nDictionaryDataSize) {
+      nPrevDecompressedSize = nDictionaryDataSize;
+   }
+
    while (!feof(pInFile) && !nDecompressionError) {
       unsigned int nBlockSize = 0;
 
@@ -469,7 +534,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
    }
 }
 
-static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
+static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
    long long nStartTime = 0LL, nEndTime = 0LL;
    long long nOriginalSize = 0LL;
    long long nKnownGoodSize = 0LL;
@@ -569,6 +634,37 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
       return 100;
    }
 
+   int nDictionaryDataSize = 0;
+   if (pszDictionaryFilename) {
+      FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
+      if (!pDictionaryFile) {
+         free(pCompareData);
+         pCompareData = NULL;
+
+         free(pOutData);
+         pOutData = NULL;
+
+         free(pInBlock);
+         pInBlock = NULL;
+
+         fclose(pOutFile);
+         pOutFile = NULL;
+
+         fclose(pInFile);
+         pInFile = NULL;
+
+         fprintf(stderr, "error opening dictionary file\n");
+         return 100;
+      }
+
+      nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile);
+      if (nDictionaryDataSize < 0)
+         nDictionaryDataSize = 0;
+
+      fclose(pDictionaryFile);
+      pDictionaryFile = NULL;
+   }
+
    if (nOptions & OPT_VERBOSE) {
       nStartTime = lzsa_get_time();
    }
@@ -577,6 +673,10 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
    bool bComparisonError = false;
    int nPrevDecompressedSize = 0;
 
+   if (nDictionaryDataSize) {
+      nPrevDecompressedSize = nDictionaryDataSize;
+   }
+
    while (!feof(pInFile) && !nDecompressionError && !bComparisonError) {
       unsigned int nBlockSize = 0;
 
@@ -695,6 +795,7 @@ int main(int argc, char **argv) {
    int i;
    const char *pszInFilename = NULL;
    const char *pszOutFilename = NULL;
+   const char *pszDictionaryFilename = NULL;
    bool bArgsError = false;
    bool bCommandDefined = false;
    bool bVerifyCompression = false;
@@ -727,6 +828,21 @@ int main(int argc, char **argv) {
          else
             bArgsError = true;
       }
+      else if (!strcmp(argv[i], "-D")) {
+         if (!pszDictionaryFilename && (i + 1) < argc) {
+            pszDictionaryFilename = argv[i + 1];
+            i++;
+         }
+         else
+            bArgsError = true;
+      }
+      else if (!strncmp(argv[i], "-D", 2)) {
+         if (!pszDictionaryFilename) {
+            pszDictionaryFilename = argv[i] + 2;
+         }
+         else
+            bArgsError = true;
+      }
       else if (!strcmp(argv[i], "-m")) {
          if (!bMinMatchDefined && (i + 1) < argc) {
             char *pEnd = NULL;
@@ -807,6 +923,7 @@ int main(int argc, char **argv) {
       fprintf(stderr, "       -d: decompress (default: compress)\n");
       fprintf(stderr, "       -v: be verbose\n");
       fprintf(stderr, "       -r: raw block format (max. 64 Kb files)\n");
+      fprintf(stderr, "       -D <filename>: use dictionary file\n");
       fprintf(stderr, "       -m <value>: minimum match size (3-14) (default: 3)\n");
       fprintf(stderr, "       --prefer-ratio: favor compression ratio (default)\n");
       fprintf(stderr, "       --prefer-speed: favor decompression speed (same as -m3)\n");
@@ -814,13 +931,13 @@ int main(int argc, char **argv) {
    }
 
    if (cCommand == 'z') {
-      int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions, nMinMatchSize);
+      int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize);
       if (nResult == 0 && bVerifyCompression) {
-         nResult = lzsa_compare(pszOutFilename, pszInFilename, nOptions);
+         nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions);
       }
    }
    else if (cCommand == 'd') {
-      return lzsa_decompress(pszInFilename, pszOutFilename, nOptions);
+      return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
    }
    else {
       return 100;