diff --git a/src/matchfinder.c b/src/matchfinder.c
index 2138b5d..224a8aa 100644
--- a/src/matchfinder.c
+++ b/src/matchfinder.c
@@ -90,7 +90,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
    intervals[0] &= POS_MASK;
    int nMinMatchSize = pCompressor->min_match_size;
 
-   if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
+   if (pCompressor->format_version >= 2) {
       for (i = 1; i < nInWindowSize - 1; i++) {
          int nIndex = (int)(intervals[i] & POS_MASK);
          int nLen = PLCP[nIndex];
@@ -246,7 +246,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
          int nMatchOffset = (int)(nOffset - match_pos);
 
          if (nMatchOffset <= MAX_OFFSET) {
-            if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) && pCompressor->format_version >= 2) {
+            if (pCompressor->format_version >= 2) {
                matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
             }
             else {
diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c
index 71b2c63..e21b188 100644
--- a/src/shrink_block_v1.c
+++ b/src/shrink_block_v1.c
@@ -156,9 +156,10 @@ static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
  * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
  * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
  */
-static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
+static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
    lzsa_arrival *arrival = pCompressor->arrival;
-   int nMinMatchSize = pCompressor->min_match_size;
+   const int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
    int i, j, n;
 
    memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
@@ -178,6 +179,9 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
             nCodingChoiceCost += 8;
          }
 
+         if (!nFavorRatio && nNumLiterals == 1)
+            nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
          lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT)];
          if (pDestArrival->from_slot == 0 ||
             nCodingChoiceCost <= pDestArrival->cost) {
@@ -216,6 +220,9 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
                int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
                int exists = 0;
 
+               if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
+                  nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
                for (n = 0;
                   n < 3 && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + k) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
                   n++) {
@@ -261,114 +268,6 @@ static void lzsa_optimize_arrivals_v1(lzsa_compressor *pCompressor, const int nS
    }
 }
 
-/**
- * Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_optimize_matches_v1(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
-   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
-   int nLastLiteralsOffset;
-   int nMinMatchSize = pCompressor->min_match_size;
-   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
-   int i;
-
-   cost[nEndOffset - 1] = 8;
-   nLastLiteralsOffset = nEndOffset;
-
-   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
-      int nBestCost, nBestMatchLen, nBestMatchOffset;
-
-      int nLiteralsLen = nLastLiteralsOffset - i;
-      nBestCost = 8 + cost[i + 1];
-      if (nLiteralsLen == LITERALS_RUN_LEN_V1 || nLiteralsLen == 256 || nLiteralsLen == 512) {
-         /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
-          * The cost automatically accumulates down the chain. */
-         nBestCost += 8;
-      }
-      if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-         nBestCost += MODESWITCH_PENALTY;
-      nBestMatchLen = 0;
-      nBestMatchOffset = 0;
-
-      lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-      int m;
-
-      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
-         int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
-
-         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
-            int nCurCost;
-            int nMatchLen = pMatch[m].length;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(nMatchLen - MIN_MATCH_SIZE_V1);
-            nCurCost += cost[i + nMatchLen];
-            if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-               nCurCost += MODESWITCH_PENALTY;
-
-            if (nBestCost > (nCurCost - nFavorRatio)) {
-               nBestCost = nCurCost;
-               nBestMatchLen = nMatchLen;
-               nBestMatchOffset = pMatch[m].offset;
-            }
-         }
-         else {
-            int nMatchLen = pMatch[m].length;
-            int k, nMatchRunLen;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nMatchRunLen = nMatchLen;
-            if (nMatchRunLen > MATCH_RUN_LEN_V1)
-               nMatchRunLen = MATCH_RUN_LEN_V1;
-
-            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
-               int nCurCost;
-
-               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
-               nCurCost += cost[i + k];
-               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-               }
-            }
-
-            for (; k <= nMatchLen; k++) {
-               int nCurCost;
-
-               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
-               nCurCost += cost[i + k];
-               if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-               }
-            }
-         }
-      }
-
-      if (nBestMatchLen >= MIN_MATCH_SIZE_V1)
-         nLastLiteralsOffset = i;
-
-      cost[i] = nBestCost;
-      pMatch->length = nBestMatchLen;
-      pMatch->offset = nBestMatchOffset;
-   }
-}
-
 /**
  * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
  * impacting the compression ratio
@@ -609,10 +508,7 @@ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, co
 int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
    int nResult;
 
-   if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-      lzsa_optimize_arrivals_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
-   else
-      lzsa_optimize_matches_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+   lzsa_optimize_forward_v1(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
 
    int nDidReduce;
    int nPasses = 0;
diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c
index ebdf735..a1e2a59 100644
--- a/src/shrink_block_v2.c
+++ b/src/shrink_block_v2.c
@@ -185,7 +185,8 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
  */
 static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset) {
    lzsa_arrival *arrival = pCompressor->arrival;
-   int nMinMatchSize = pCompressor->min_match_size;
+   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
+   const int nMinMatchSize = pCompressor->min_match_size;
    int i, j, n;
    lzsa_match match[32];
 
@@ -211,6 +212,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
             nCodingChoiceCost += 16;
          }
 
+         if (!nFavorRatio && nNumLiterals == 1)
+            nCodingChoiceCost += MODESWITCH_PENALTY * 3;
+
          int exists = 0;
          for (n = 0;
             n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
@@ -279,6 +283,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
                int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
                int exists = 0;
 
+               if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
+                  nCodingChoiceCost += MODESWITCH_PENALTY*3;
+
                for (n = 0; 
                   n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
                   n++) {
@@ -369,268 +376,6 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
    }
 }
 
-/**
- * Attempt to pick optimal matches using a backward LZSS style parser, so as to produce the smallest possible output that decompresses to the same input
- *
- * @param pCompressor compression context
- * @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
- * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
- */
-static void lzsa_optimize_backward_v2(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
-   int *cost = (int*)pCompressor->pos_data;  /* Reuse */
-   int *prev_match = (int*)pCompressor->intervals; /* Reuse */
-   lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
-   int nLastLiteralsOffset;
-   int nMinMatchSize = pCompressor->min_match_size;
-   const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
-   int i;
-
-   cost[nEndOffset - 1] = 8;
-   prev_match[nEndOffset - 1] = nEndOffset;
-   nLastLiteralsOffset = nEndOffset;
-
-   pCompressor->best_match[nEndOffset - 1].length = 0;
-   pCompressor->best_match[nEndOffset - 1].offset = 0;
-
-   repmatch_opt[nEndOffset - 1].best_slot_for_incoming = -1;
-   repmatch_opt[nEndOffset - 1].incoming_offset = -1;
-   repmatch_opt[nEndOffset - 1].expected_repmatch = 0;
-
-   for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
-      int nLiteralsCost;
-
-      int nLiteralsLen = nLastLiteralsOffset - i;
-      nLiteralsCost = 8 + cost[i + 1];
-
-      /* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
-       * The cost automatically accumulates down the chain. */
-      if (nLiteralsLen == LITERALS_RUN_LEN_V2) {
-         nLiteralsCost += 4;
-      }
-      else if (nLiteralsLen == (LITERALS_RUN_LEN_V2 + 15)) {
-         nLiteralsCost += 8;
-      }
-      else if (nLiteralsLen == 256) {
-         nLiteralsCost += 16;
-      }
-      if (pCompressor->best_match[i + 1].length >= MIN_MATCH_SIZE_V2)
-         nLiteralsCost += MODESWITCH_PENALTY;
-
-      const lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
-      int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT);
-      int m;
-
-      cost[i] = nLiteralsCost;
-      pCompressor->best_match[i].length = 0;
-      pCompressor->best_match[i].offset = 0;
-
-      repmatch_opt[i].best_slot_for_incoming = -1;
-      repmatch_opt[i].incoming_offset = -1;
-      repmatch_opt[i].expected_repmatch = 0;
-
-      for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
-         int nBestCost, nBestMatchLen, nBestMatchOffset, nBestUpdatedSlot, nBestUpdatedIndex, nBestExpectedRepMatch;
-
-         nBestCost = nLiteralsCost;
-         nBestMatchLen = 0;
-         nBestMatchOffset = 0;
-         nBestUpdatedSlot = -1;
-         nBestUpdatedIndex = -1;
-         nBestExpectedRepMatch = 0;
-
-         if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
-            int nCurCost;
-            int nMatchLen = pMatch[m].length;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            int nCurIndex = prev_match[i + nMatchLen];
-
-            int nMatchOffsetSize = 0;
-            int nCurExpectedRepMatch = 1;
-            if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-               nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-               nCurExpectedRepMatch = 0;
-            }
-
-            nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(nMatchLen - MIN_MATCH_SIZE_V2);
-            nCurCost += cost[i + nMatchLen];
-            if (pCompressor->best_match[i + nMatchLen].length >= MIN_MATCH_SIZE_V2)
-               nCurCost += MODESWITCH_PENALTY;
-
-            if (nBestCost > (nCurCost - nFavorRatio)) {
-               nBestCost = nCurCost;
-               nBestMatchLen = nMatchLen;
-               nBestMatchOffset = pMatch[m].offset;
-               nBestUpdatedSlot = -1;
-               nBestUpdatedIndex = -1;
-               nBestExpectedRepMatch = nCurExpectedRepMatch;
-            }
-         }
-         else {
-            int nMatchLen = pMatch[m].length;
-            int k, nMatchRunLen;
-
-            if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
-               nMatchLen = nEndOffset - LAST_LITERALS - i;
-
-            nMatchRunLen = nMatchLen;
-            if (nMatchRunLen > MATCH_RUN_LEN_V2)
-               nMatchRunLen = MATCH_RUN_LEN_V2;
-
-            for (k = nMinMatchSize; k < nMatchRunLen; k++) {
-               int nCurCost;
-
-               int nCurIndex = prev_match[i + k];
-               int nMatchOffsetSize = 0;
-               int nCurExpectedRepMatch = 1;
-               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-                  nCurExpectedRepMatch = 0;
-               }
-
-               nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
-               nCurCost += cost[i + k];
-               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               int nCurUpdatedSlot = -1;
-               int nCurUpdatedIndex = -1;
-
-               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
-                  int r;
-
-                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
-                     if (pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
-                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
-
-                        if (nAltCost <= nCurCost) {
-                           nCurUpdatedSlot = r;
-                           nCurUpdatedIndex = nCurIndex;
-                           nCurCost = nAltCost;
-                           nCurExpectedRepMatch = 2;
-                        }
-                     }
-                  }
-               }
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-                  nBestUpdatedSlot = nCurUpdatedSlot;
-                  nBestUpdatedIndex = nCurUpdatedIndex;
-                  nBestExpectedRepMatch = nCurExpectedRepMatch;
-               }
-            }
-
-            for (; k <= nMatchLen; k++) {
-               int nCurCost;
-
-               int nCurIndex = prev_match[i + k];
-               int nMatchOffsetSize = 0;
-               int nCurExpectedRepMatch = 1;
-               if (nCurIndex >= nEndOffset || pCompressor->best_match[nCurIndex].length < MIN_MATCH_SIZE_V2 ||
-                  pCompressor->best_match[nCurIndex].offset != pMatch[m].offset) {
-                  nMatchOffsetSize = (pMatch[m].offset <= 32) ? 4 : ((pMatch[m].offset <= 512) ? 8 : ((pMatch[m].offset <= (8192 + 512)) ? 12 : 16));
-                  nCurExpectedRepMatch = 0;
-               }
-
-               nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
-               nCurCost += cost[i + k];
-               if (pCompressor->best_match[i + k].length >= MIN_MATCH_SIZE_V2)
-                  nCurCost += MODESWITCH_PENALTY;
-
-               int nCurUpdatedSlot = -1;
-               int nCurUpdatedIndex = -1;
-
-               if (nMatchOffsetSize && nCurIndex < nEndOffset && pCompressor->best_match[nCurIndex].length >= MIN_MATCH_SIZE_V2 && !repmatch_opt[nCurIndex].expected_repmatch) {
-                  int r;
-
-                  for (r = 0; r < NMATCHES_PER_OFFSET && pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].length >= MIN_MATCH_SIZE_V2; r++) {
-                     if (pCompressor->selected_match[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r].offset == pMatch[m].offset) {
-                        int nAltCost = nCurCost - nMatchOffsetSize + pCompressor->slot_cost[(nCurIndex << MATCHES_PER_OFFSET_SHIFT) + r] - cost[nCurIndex];
-
-                        if (nAltCost <= nCurCost) {
-                           nCurUpdatedSlot = r;
-                           nCurUpdatedIndex = nCurIndex;
-                           nCurCost = nAltCost;
-                           nCurExpectedRepMatch = 2;
-                        }
-                     }
-                  }
-               }
-
-               if (nBestCost > (nCurCost - nFavorRatio)) {
-                  nBestCost = nCurCost;
-                  nBestMatchLen = k;
-                  nBestMatchOffset = pMatch[m].offset;
-                  nBestUpdatedSlot = nCurUpdatedSlot;
-                  nBestUpdatedIndex = nCurUpdatedIndex;
-                  nBestExpectedRepMatch = nCurExpectedRepMatch;
-               }
-            }
-         }
-
-         pSlotCost[m] = nBestCost;         
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m].length = nBestMatchLen;
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m].offset = nBestMatchOffset;
-
-         if (m == 0 || (nBestMatchLen && cost[i] >= nBestCost)) {
-            cost[i] = nBestCost;
-            pCompressor->best_match[i].length = nBestMatchLen;
-            pCompressor->best_match[i].offset = nBestMatchOffset;
-
-            repmatch_opt[i].expected_repmatch = nBestExpectedRepMatch;
-
-            if (nBestUpdatedSlot >= 0 && nBestUpdatedIndex >= 0) {
-               repmatch_opt[nBestUpdatedIndex].best_slot_for_incoming = nBestUpdatedSlot;
-               repmatch_opt[nBestUpdatedIndex].incoming_offset = i;
-            }
-         }
-      }
-      for (; m < NMATCHES_PER_OFFSET; m++) {
-         pSlotCost[m] = 0;
-         pCompressor->selected_match[(i << MATCHES_PER_OFFSET_SHIFT) + m] = pMatch[m];
-      }
-
-      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2)
-         nLastLiteralsOffset = i;
-
-      prev_match[i] = nLastLiteralsOffset;
-   }
-
-   int nIncomingOffset = -1;
-   for (i = nStartOffset; i < nEndOffset; ) {
-      if (pCompressor->best_match[i].length >= MIN_MATCH_SIZE_V2) {
-         if (nIncomingOffset >= 0 && repmatch_opt[i].incoming_offset == nIncomingOffset && repmatch_opt[i].best_slot_for_incoming >= 0) {
-            lzsa_match *pMatch = pCompressor->selected_match + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
-            int *pSlotCost = pCompressor->slot_cost + (i << MATCHES_PER_OFFSET_SHIFT) + repmatch_opt[i].best_slot_for_incoming;
-
-            pCompressor->best_match[i].length = pMatch->length;
-            pCompressor->best_match[i].offset = pMatch->offset;
-            cost[i] = *pSlotCost;
-
-            if (repmatch_opt[i].expected_repmatch == 2)
-               repmatch_opt[i].expected_repmatch = 1;
-         }
-         else {
-            if (repmatch_opt[i].expected_repmatch == 2)
-               repmatch_opt[i].expected_repmatch = 0;
-         }
-
-         nIncomingOffset = i;
-         i += pCompressor->best_match[i].length;
-      }
-      else {
-         i++;
-      }
-   }
-}
-
 /**
  * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without
  * impacting the compression ratio
@@ -677,6 +422,41 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
                      pMatch->offset = nRepMatchOffset;
                }
 
+               if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
+                  /* Otherwise, try to gain a match forward as well */
+                  if (i >= pBestMatch[nNextIndex].offset && (i - pBestMatch[nNextIndex].offset + pMatch->length) <= (nEndOffset - LAST_LITERALS)) {
+                     int nMaxLen = 0;
+                     while (nMaxLen < pMatch->length && pInWindow[i - pBestMatch[nNextIndex].offset + nMaxLen] == pInWindow[i - pMatch->offset + nMaxLen])
+                        nMaxLen++;
+                     if (nMaxLen >= pMatch->length) {
+                        /* Replace */
+                        pMatch->offset = pBestMatch[nNextIndex].offset;
+                     }
+                     else if (nMaxLen >= 2 && pMatch->offset != nRepMatchOffset) {
+                        int nPartialSizeBefore, nPartialSizeAfter;
+
+                        nPartialSizeBefore = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
+                        nPartialSizeBefore += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
+                        nPartialSizeBefore += lzsa_get_literals_varlen_size_v2(nNextLiterals);
+
+                        nPartialSizeAfter = lzsa_get_match_varlen_size_v2(nMaxLen - MIN_MATCH_SIZE_V2);
+                        nPartialSizeAfter += lzsa_get_literals_varlen_size_v2(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
+
+                        if (nPartialSizeAfter < nPartialSizeBefore) {
+                           int j;
+
+                           /* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but
+                            * we have calculated that this is shorter */
+                           pMatch->offset = pBestMatch[nNextIndex].offset;
+                           for (j = nMaxLen; j < pMatch->length; j++) {
+                              pBestMatch[i + j].length = 0;
+                           }
+                           pMatch->length = nMaxLen;
+                        }
+                     }
+                  }
+               }
+
                if (pMatch->length < 9 /* Don't waste time considering large matches, they will always win over literals */) {
                   /* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
 
@@ -977,10 +757,7 @@ static int lzsa_write_raw_uncompressed_block_v2(lzsa_compressor *pCompressor, co
 int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
    int nResult;
 
-   if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-      lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
-   else
-      lzsa_optimize_backward_v2(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
+   lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
 
    int nDidReduce;
    int nPasses = 0;
diff --git a/src/shrink_context.c b/src/shrink_context.c
index 1f9b6d7..dc77bdd 100644
--- a/src/shrink_context.c
+++ b/src/shrink_context.c
@@ -59,10 +59,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
    pCompressor->pos_data = NULL;
    pCompressor->open_intervals = NULL;
    pCompressor->match = NULL;
-   pCompressor->selected_match = NULL;
    pCompressor->best_match = NULL;
-   pCompressor->slot_cost = NULL;
-   pCompressor->repmatch_opt = NULL;
    pCompressor->arrival = NULL;
    pCompressor->min_match_size = nMinMatchSize;
    if (pCompressor->min_match_size < nMinMatchSizeForFormat)
@@ -87,31 +84,14 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
                pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
 
                if (pCompressor->match) {
-                  if (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO)
-                     pCompressor->arrival = (lzsa_arrival *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_arrival));
+                  pCompressor->arrival = (lzsa_arrival *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_arrival));
 
-                  if (pCompressor->arrival || (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
+                  if (pCompressor->arrival) {
                      if (pCompressor->format_version == 2) {
                         pCompressor->best_match = (lzsa_match *)malloc(nMaxWindowSize * sizeof(lzsa_match));
 
                         if (pCompressor->best_match) {
-                           if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0) {
-                              pCompressor->selected_match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
-
-                              if (pCompressor->selected_match) {
-                                 pCompressor->slot_cost = (int *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(int));
-
-                                 if (pCompressor->slot_cost) {
-                                    pCompressor->repmatch_opt = (lzsa_repmatch_opt *)malloc(nMaxWindowSize * sizeof(lzsa_repmatch_opt));
-
-                                    if (pCompressor->repmatch_opt)
-                                       return 0;
-                                 }
-                              }
-                           }
-                           else {
-                              return 0;
-                           }
+                           return 0;
                         }
                      }
                      else {
@@ -141,26 +121,11 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
       pCompressor->arrival = NULL;
    }
 
-   if (pCompressor->repmatch_opt) {
-      free(pCompressor->repmatch_opt);
-      pCompressor->repmatch_opt = NULL;
-   }
-
-   if (pCompressor->slot_cost) {
-      free(pCompressor->slot_cost);
-      pCompressor->slot_cost = NULL;
-   }
-
    if (pCompressor->best_match) {
       free(pCompressor->best_match);
       pCompressor->best_match = NULL;
    }
 
-   if (pCompressor->selected_match) {
-      free(pCompressor->selected_match);
-      pCompressor->selected_match = NULL;
-   }
-
    if (pCompressor->match) {
       free(pCompressor->match);
       pCompressor->match = NULL;
@@ -207,7 +172,7 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pI
       if (nPreviousBlockSize) {
          lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
       }
-      if ((pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) == 0 || pCompressor->format_version < 2)
+      if (pCompressor->format_version < 2)
          lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
 
       if (pCompressor->format_version == 1) {
diff --git a/src/shrink_context.h b/src/shrink_context.h
index 9f99815..6fdfb78 100644
--- a/src/shrink_context.h
+++ b/src/shrink_context.h
@@ -66,13 +66,6 @@ typedef struct _lzsa_match {
    unsigned short offset;
 } lzsa_match;
 
-/** One rep-match slot (for LZSA2) */
-typedef struct _lzsa_repmatch_opt {
-   int incoming_offset;
-   short best_slot_for_incoming;
-   short expected_repmatch;
-} lzsa_repmatch_opt;
-
 /** Forward arrival slot */
 typedef struct {
    int cost;
@@ -93,10 +86,7 @@ typedef struct _lzsa_compressor {
    unsigned int *pos_data;
    unsigned int *open_intervals;
    lzsa_match *match;
-   lzsa_match *selected_match;
    lzsa_match *best_match;
-   int *slot_cost;
-   lzsa_repmatch_opt *repmatch_opt;
    lzsa_arrival *arrival;
    int min_match_size;
    int format_version;