Simplify LZSA1 token reducer (same binary output)

2025-02-18 15:30:25 +00:00 · 2019-09-22 20:34:08 +02:00 · 2019-09-22 20:34:08 +02:00 · 1869d85c1f
commit 1869d85c1f
parent 1a4f662360
1 changed files with 35 additions and 53 deletions
--- a/src/shrink_block_v1.c
+++ b/src/shrink_block_v1.c
@ -388,66 +388,48 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const in
      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
      if (pMatch->length >= MIN_MATCH_SIZE_V1) {
-         int nMatchLen = pMatch->length;
+         if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
-         int nReduce = 0;
+            (i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
            int nNextIndex = i + pMatch->length;
            int nNextLiterals = 0;
-         if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
+            while (nNextIndex < nEndOffset && pCompressor->match[nNextIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1) {
-            int nMatchOffset = pMatch->offset;
+               nNextLiterals++;
-            int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
+               nNextIndex++;
-            int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
+            }
-            if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) {
+            /* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
-               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) {
+            if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
-                  /* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
+               8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
-                   * make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
+               (8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
-                   * at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
+               /* Reduce */
-                   * match command by literals, the output size will not increase and it will remove one command. */
+               int nMatchLen = pMatch->length;
-                  nReduce = 1;
+               int j;
               for (j = 0; j < nMatchLen; j++) {
                  pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
               }
            }
            else {
               int nCurIndex = i + nMatchLen;
               int nNextNumLiterals = 0;
-               do {
+               nDidReduce = 1;
                  nCurIndex++;
                  nNextNumLiterals++;
               } while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
               if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
                  /* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
                   * more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
                  nReduce = 1;
               }
            }
         }
         if (nReduce) {
            int j;
            for (j = 0; j < nMatchLen; j++) {
               pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
            }
            nNumLiterals += nMatchLen;
            i += nMatchLen;
            nDidReduce = 1;
         }
         else {
            if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
               pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
               (nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
               /* Join */
               pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
               pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
               pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
               continue;
            }
            nNumLiterals = 0;
            i += nMatchLen;
         }
         if ((i + pMatch->length) < nEndOffset && pMatch->length >= LCP_MAX &&
            pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + pMatch->length) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (pMatch->length % pMatch->offset) == 0 &&
            (pMatch->length + pCompressor->match[(i + pMatch->length) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
            int nMatchLen = pMatch->length;
            /* Join */
            pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
            pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
            pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
            continue;
         }
         i += pMatch->length;
         nNumLiterals = 0;
      }
      else {
         nNumLiterals++;