Increase LZSA2 ratio and use forward parser for -m

2025-04-06 05:41:29 +00:00 · 2019-09-23 20:24:50 +02:00 · 2019-09-23 20:24:50 +02:00 · 249b8a4c46
commit 249b8a4c46
parent 74040890fc
5 changed files with 60 additions and 432 deletions
--- a/src/matchfinder.c
+++ b/src/matchfinder.c
@ -90,7 +90,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
   intervals[0] &= POS_MASK;
   int nMinMatchSize = pCompressor->min_match_size;

-   if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
+   if (pCompressor->format_version >= 2) {
      for (i = 1; i < nInWindowSize - 1; i++) {
         int nIndex = (int)(intervals[i] & POS_MASK);
         int nLen = PLCP[nIndex];
@ -246,7 +246,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
         int nMatchOffset = (int)(nOffset - match_pos);

         if (nMatchOffset <= MAX_OFFSET) {
-            if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
+            if (pCompressor->format_version >= 2) {
               matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
            }
            else {
--- a/src/shrink_block_v1.c
+++ b/src/shrink_block_v1.c
@ -156,9 +156,10 @@ static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
 * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
 * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
 */
-static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
   lzsa_arrival *arrival = pCompressor->arrival;
-   int nMinMatchSize = pCompressor->min_match_size;
+   const int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
   int i, j, n;

   memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
@ -178,6 +179,9 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
            nCodingChoiceCost += 8;
         }

+         if (!nFavorRatio && nNumLiterals == 1)
+            nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
         lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT)];
         if (pDestArrival->from_slot == 0 ||
            nCodingChoiceCost <= pDestArrival->cost) {
@ -216,6 +220,9 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
               int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
               int exists = 0;

+               if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
+                  nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
               for (n = 0;
                  n < 3 && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
                  n++) {
@ -261,114 +268,6 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
   }
 }

-/**
- * Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
-   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
-   int nLastLiteralsOffset;
-   int nMinMatchSize = pCompressor->min_match_size;
-   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
-   int i;
-
-   cost[nEndOffset - 1] = 8;
-   nLastLiteralsOffset = nEndOffset;
-
-   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
-      int nBestCost, nBestMatchLen, nBestMatchOffset;
-
-      int nLiteralsLen = nLastLiteralsOffset - i;
-      nBestCost = 8 + cost[i + 1];
-      if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
-         /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
-          * The cost automatically accumulates down the chain. */
-         nBestCost += 8;
-      }
-      if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-         nBestCost += MODESWITCH_PENALTY;
-      nBestMatchLen = 0;
-      nBestMatchOffset = 0;
-
-      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-      int m;
-
-      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
-         int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
-
-         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
-            int nCurCost;
-            int nMatchLen = pMatch[m].length;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
-            nCurCost += cost[i + nMatchLen];
-            if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-               nCurCost += MODESWITCH_PENALTY;
-
-            if (nBestCost > (nCurCost - nFavorRatio)) {
-               nBestCost = nCurCost;
-               nBestMatchLen = nMatchLen;
-               nBestMatchOffset = pMatch[m].offset;
-            }
-         }
-         else {
-            int nMatchLen = pMatch[m].length;
-            int k, nMatchRunLen;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nMatchRunLen = nMatchLen;
-            if (nMatchRunLen > MATCH_RUN_LEN_V1)
-               nMatchRunLen = MATCH_RUN_LEN_V1;
-
-            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
-               int nCurCost;
-
-               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
-               nCurCost += cost[i + k];
-               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-               }
-            }
-
-            for (; k <= nMatchLen; k++) {
-               int nCurCost;
-
-               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
-               nCurCost += cost[i + k];
-               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-               }
-            }
-         }
-      }
-
-      if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
-         nLastLiteralsOffset = i;
-
-      cost[i] = nBestCost;
-      pMatch->length = nBestMatchLen;
-      pMatch->offset = nBestMatchOffset;
-   }
-}
-
 /**
 * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
 * impacting the compression ratio
@ -609,10 +508,7 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
 int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
   int nResult;

-   if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-      lzsa_optimize_arrivals_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
-   else
-      lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+   lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);

   int nDidReduce;
   int nPasses = 0;
--- a/src/shrink_block_v2.c
+++ b/src/shrink_block_v2.c
@ -185,7 +185,8 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
 */
 static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset) {
   lzsa_arrival *arrival = pCompressor->arrival;
-   int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+   const int nMinMatchSize = pCompressor->min_match_size;
   int i, j, n;
   lzsa_match match[32];

@ -211,6 +212,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
            nCodingChoiceCost += 16;
         }

+         if (!nFavorRatio && nNumLiterals == 1)
+            nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
         int exists = 0;
         for (n = 0;
            n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
@ -279,6 +283,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
               int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
               int exists = 0;

+               if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
+                  nCodingChoiceCost += MODESWITCH_PENALTY*3;
+
               for (n = 0; 
                  n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
                  n++) {
@ -369,268 +376,6 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
   }
 }

-/**
- * Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_optimize_backward_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
-   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
-   int *prev_match = (int*)pCompressor->intervals; /* Reuse */
-   lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
-   int nLastLiteralsOffset;
-   int nMinMatchSize = pCompressor->min_match_size;
-   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
-   int i;
-
-   cost[nEndOffset - 1] = 8;
-   prev_match[nEndOffset - 1] = nEndOffset;
-   nLastLiteralsOffset = nEndOffset;
-
-   pCompressor->best_match[nEndOffset - 1].length = 0;
-   pCompressor->best_match[nEndOffset - 1].offset = 0;
-
-   repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
-   repmatch_opt[nEndOffset - 1].incoming_offset = -1;
-   repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
-
-   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
-      int nLiteralsCost;
-
-      int nLiteralsLen = nLastLiteralsOffset - i;
-      nLiteralsCost = 8 + cost[i + 1];
-
-      /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
-       * The cost automatically accumulates down the chain. */
-      if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
-         nLiteralsCost += 4;
-      }
-      else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
-         nLiteralsCost += 8;
-      }
-      else if (nLiteralsLen == 256) {
-         nLiteralsCost += 16;
-      }
-      if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
-         nLiteralsCost += MODESWITCH_PENALTY;
-
-      const lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-      int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
-      int m;
-
-      cost[i] = nLiteralsCost;
-      pCompressor->best_match[i].length = 0;
-      pCompressor->best_match[i].offset = 0;
-
-      repmatch_opt[i].best_slot_for_incoming = -1;
-      repmatch_opt[i].incoming_offset = -1;
-      repmatch_opt[i].expected_repmatch = 0;
-
-      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
-         int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
-
-         nBestCost = nLiteralsCost;
-         nBestMatchLen = 0;
-         nBestMatchOffset = 0;
-         nBestUpdatedSlot = -1;
-         nBestUpdatedIndex = -1;
-         nBestExpectedRepMatch = 0;
-
-         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
-            int nCurCost;
-            int nMatchLen = pMatch[m].length;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            int nCurIndex = prev_match[i + nMatchLen];
-
-            int nMatchOffsetSize = 0;
-            int nCurExpectedRepMatch = 1;
-            if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-               nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-               nCurExpectedRepMatch = 0;
-            }
-
-            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
-            nCurCost += cost[i + nMatchLen];
-            if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
-               nCurCost += MODESWITCH_PENALTY;
-
-            if (nBestCost > (nCurCost - nFavorRatio)) {
-               nBestCost = nCurCost;
-               nBestMatchLen = nMatchLen;
-               nBestMatchOffset = pMatch[m].offset;
-               nBestUpdatedSlot = -1;
-               nBestUpdatedIndex = -1;
-               nBestExpectedRepMatch = nCurExpectedRepMatch;
-            }
-         }
-         else {
-            int nMatchLen = pMatch[m].length;
-            int k, nMatchRunLen;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nMatchRunLen = nMatchLen;
-            if (nMatchRunLen > MATCH_RUN_LEN_V2)
-               nMatchRunLen = MATCH_RUN_LEN_V2;
-
-            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
-               int nCurCost;
-
-               int nCurIndex = prev_match[i + k];
-               int nMatchOffsetSize = 0;
-               int nCurExpectedRepMatch = 1;
-               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-                  nCurExpectedRepMatch = 0;
-               }
-
-               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
-               nCurCost += cost[i + k];
-               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               int nCurUpdatedSlot = -1;
-               int nCurUpdatedIndex = -1;
-
-               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
-                  int r;
-
-                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
-                     if (pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
-                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
-
-                        if (nAltCost <= nCurCost) {
-                           nCurUpdatedSlot = r;
-                           nCurUpdatedIndex = nCurIndex;
-                           nCurCost = nAltCost;
-                           nCurExpectedRepMatch = 2;
-                        }
-                     }
-                  }
-               }
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-                  nBestUpdatedSlot = nCurUpdatedSlot;
-                  nBestUpdatedIndex = nCurUpdatedIndex;
-                  nBestExpectedRepMatch = nCurExpectedRepMatch;
-               }
-            }
-
-            for (; k <= nMatchLen; k++) {
-               int nCurCost;
-
-               int nCurIndex = prev_match[i + k];
-               int nMatchOffsetSize = 0;
-               int nCurExpectedRepMatch = 1;
-               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-                  nCurExpectedRepMatch = 0;
-               }
-
-               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
-               nCurCost += cost[i + k];
-               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               int nCurUpdatedSlot = -1;
-               int nCurUpdatedIndex = -1;
-
-               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
-                  int r;
-
-                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
-                     if (pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
-                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
-
-                        if (nAltCost <= nCurCost) {
-                           nCurUpdatedSlot = r;
-                           nCurUpdatedIndex = nCurIndex;
-                           nCurCost = nAltCost;
-                           nCurExpectedRepMatch = 2;
-                        }
-                     }
-                  }
-               }
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-                  nBestUpdatedSlot = nCurUpdatedSlot;
-                  nBestUpdatedIndex = nCurUpdatedIndex;
-                  nBestExpectedRepMatch = nCurExpectedRepMatch;
-               }
-            }
-         }
-
-         pSlotCost[m] = nBestCost;         
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m].length = nBestMatchLen;
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m].offset = nBestMatchOffset;
-
-         if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
-            cost[i] = nBestCost;
-            pCompressor->best_match[i].length = nBestMatchLen;
-            pCompressor->best_match[i].offset = nBestMatchOffset;
-
-            repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
-
-            if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
-               repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
-               repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
-            }
-         }
-      }
-      for (; m < NMATCHES_PER_OFFSET; m++) {
-         pSlotCost[m] = 0;
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m] = pMatch[m];
-      }
-
-      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
-         nLastLiteralsOffset = i;
-
-      prev_match[i] = nLastLiteralsOffset;
-   }
-
-   int nIncomingOffset = -1;
-   for (i = nStartOffset; i < nEndOffset; ) {
-      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
-         if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
-            lzsa_match *pMatch = pCompressor->selected_match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
-            int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
-
-            pCompressor->best_match[i].length = pMatch->length;
-            pCompressor->best_match[i].offset = pMatch->offset;
-            cost[i] = *pSlotCost;
-
-            if (repmatch_opt[i].expected_repmatch == 2)
-               repmatch_opt[i].expected_repmatch = 1;
-         }
-         else {
-            if (repmatch_opt[i].expected_repmatch == 2)
-               repmatch_opt[i].expected_repmatch = 0;
-         }
-
-         nIncomingOffset = i;
-         i += pCompressor->best_match[i].length;
-      }
-      else {
-         i++;
-      }
-   }
-}
-
 /**
 * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
 * impacting the compression ratio
@ -677,6 +422,41 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
                     pMatch->offset = nRepMatchOffset;
               }

+               if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
+                  /* Otherwise, try to gain a match forward as well */
+                  if (i >= pBestMatch[nNextIndex].offset && (i - pBestMatch[nNextIndex].offset + pMatch->length) <= (nEndOffset - LAST_LITERALS)) {
+                     int nMaxLen = 0;
+                     while (nMaxLen < pMatch->length && pInWindow[i - pBestMatch[nNextIndex].offset + nMaxLen] == pInWindow[i - pMatch->offset + nMaxLen])
+                        nMaxLen++;
+                     if (nMaxLen >= pMatch->length) {
+                        /* Replace */
+                        pMatch->offset = pBestMatch[nNextIndex].offset;
+                     }
+                     else if (nMaxLen >= 2 && pMatch->offset != nRepMatchOffset) {
+                        int nPartialSizeBefore, nPartialSizeAfter;
+
+                        nPartialSizeBefore = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
+                        nPartialSizeBefore += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
+                        nPartialSizeBefore += lzsa_get_literals_varlen_size_v2(nNextLiterals);
+
+                        nPartialSizeAfter = lzsa_get_match_varlen_size_v2(nMaxLen - MIN_MATCH_SIZE_V2);
+                        nPartialSizeAfter += lzsa_get_literals_varlen_size_v2(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
+
+                        if (nPartialSizeAfter < nPartialSizeBefore) {
+                           int j;
+
+                           /* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but
+                            * we have calculated that this is shorter */
+                           pMatch->offset = pBestMatch[nNextIndex].offset;
+                           for (j = nMaxLen; j < pMatch->length; j++) {
+                              pBestMatch[i + j].length = 0;
+                           }
+                           pMatch->length = nMaxLen;
+                        }
+                     }
+                  }
+               }
+
               if (pMatch->length < 9 /* Don't waste time considering large matches, they will always win over literals */) {
                  /* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */

@ -977,10 +757,7 @@ static int lzsa_write_raw_uncompressed_block_v2(lzsa_compressor *pCompressor, co
 int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
   int nResult;

-   if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-      lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
-   else
-      lzsa_optimize_backward_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+   lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);

   int nDidReduce;
   int nPasses = 0;
--- a/src/shrink_context.c
+++ b/src/shrink_context.c
@ -59,10 +59,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
   pCompressor->pos_data = NULL;
   pCompressor->open_intervals = NULL;
   pCompressor->match = NULL;
-   pCompressor->selected_match = NULL;
   pCompressor->best_match = NULL;
-   pCompressor->slot_cost = NULL;
-   pCompressor->repmatch_opt = NULL;
   pCompressor->arrival = NULL;
   pCompressor->min_match_size = nMinMatchSize;
   if (pCompressor->min_match_size < nMinMatchSizeForFormat)
@ -87,31 +84,14 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
               pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));

               if (pCompressor->match) {
-                  if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-                     pCompressor->arrival = (lzsa_arrival *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_arrival));
+                  pCompressor->arrival = (lzsa_arrival *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_arrival));

-                  if (pCompressor->arrival || (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
+                  if (pCompressor->arrival) {
                     if (pCompressor->format_version == 2) {
                        pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));

                        if (pCompressor->best_match) {
-                           if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
-                              pCompressor->selected_match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
-
-                              if (pCompressor->selected_match) {
-                                 pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
-
-                                 if (pCompressor->slot_cost) {
-                                    pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
-
-                                    if (pCompressor->repmatch_opt)
-                                       return 0;
-                                 }
-                              }
-                           }
-                           else {
-                              return 0;
-                           }
+                           return 0;
                        }
                     }
                     else {
@ -141,26 +121,11 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
      pCompressor->arrival = NULL;
   }

-   if (pCompressor->repmatch_opt) {
-      free(pCompressor->repmatch_opt);
-      pCompressor->repmatch_opt = NULL;
-   }
-
-   if (pCompressor->slot_cost) {
-      free(pCompressor->slot_cost);
-      pCompressor->slot_cost = NULL;
-   }
-
   if (pCompressor->best_match) {
      free(pCompressor->best_match);
      pCompressor->best_match = NULL;
   }

-   if (pCompressor->selected_match) {
-      free(pCompressor->selected_match);
-      pCompressor->selected_match = NULL;
-   }
-
   if (pCompressor->match) {
      free(pCompressor->match);
      pCompressor->match = NULL;
@ -207,7 +172,7 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pI
      if (nPreviousBlockSize) {
         lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
      }
-      if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0 || pCompressor->format_version < 2)
+      if (pCompressor->format_version < 2)
         lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);

      if (pCompressor->format_version == 1) {
--- a/src/shrink_context.h
+++ b/src/shrink_context.h
@ -66,13 +66,6 @@ typedef struct _lzsa_match {
   unsigned short offset;
 } lzsa_match;

-/** One rep-match slot (for LZSA2) */
-typedef struct _lzsa_repmatch_opt {
-   int incoming_offset;
-   short best_slot_for_incoming;
-   short expected_repmatch;
-} lzsa_repmatch_opt;
-
 /** Forward arrival slot */
 typedef struct {
   int cost;
@ -93,10 +86,7 @@ typedef struct _lzsa_compressor {
   unsigned int *pos_data;
   unsigned int *open_intervals;
   lzsa_match *match;
-   lzsa_match *selected_match;
   lzsa_match *best_match;
-   int *slot_cost;
-   lzsa_repmatch_opt *repmatch_opt;
   lzsa_arrival *arrival;
   int min_match_size;
   int format_version;