From 82f03b55e39145380fc381c8491fc6384b9d35c1 Mon Sep 17 00:00:00 2001
From: Emmanuel Marty <emmanuel@fgl.com>
Date: Thu, 2 Feb 2023 11:11:14 +0100
Subject: [PATCH] Faster LZSA1 compression

---
 src/shrink_block_v1.c | 67 +++++++++++++++++++------------------------
 1 file changed, 29 insertions(+), 38 deletions(-)

diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c
index f47afaf..f5d1afb 100644
--- a/src/shrink_block_v1.c
+++ b/src/shrink_block_v1.c
@@ -219,23 +219,23 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nSt
       const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
       const int nNumArrivalsForThisPos = j;
 
-      for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
-         int nMatchLen = match[m].length;
-         const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
-         int nStartingMatchLen, k;
+      if (nNumArrivalsForThisPos != 0) {
+         for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
+            int nMatchLen = match[m].length;
+            const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
+            int nStartingMatchLen, k;
 
-         if ((i + nMatchLen) > nEndOffset)
-            nMatchLen = nEndOffset - i;
+            if ((i + nMatchLen) > nEndOffset)
+               nMatchLen = nEndOffset - i;
 
-         if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
-            nStartingMatchLen = nMatchLen;
-         else
-            nStartingMatchLen = nMinMatchSize;
-         for (k = nStartingMatchLen; k <= nMatchLen; k++) {
-            const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
+            if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
+               nStartingMatchLen = nMatchLen;
+            else
+               nStartingMatchLen = nMinMatchSize;
+            for (k = nStartingMatchLen; k <= nMatchLen; k++) {
+               const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
 
-            if (nNumArrivalsForThisPos != 0) {
-               lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT_V1];
+               lzsa_arrival* pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V1];
                int nCodingChoiceCost = cur_arrival[0].cost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
                int exists = 0, n;
 
@@ -253,31 +253,22 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nSt
 
                if (!exists) {
                   const int nScore = cur_arrival[0].score + 5;
-                  int nNonRepMatchIdx = -1;
 
-                  for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
-                     if (nCodingChoiceCost < pDestSlots[n].cost ||
-                        (nCodingChoiceCost == pDestSlots[n].cost && nScore < (pDestSlots[n].score + nDisableScore))) {
-                        nNonRepMatchIdx = n;
-                        break;
-                     }
+                  if (nCodingChoiceCost < pDestSlots[0].cost ||
+                     (nCodingChoiceCost == pDestSlots[0].cost && nScore < (pDestSlots[0].score + nDisableScore))) {
+                     memmove(&pDestSlots[1],
+                        &pDestSlots[0],
+                        sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - 1));
+
+                     pDestSlots->cost = nCodingChoiceCost;
+                     pDestSlots->rep_offset = match[m].offset;
+                     pDestSlots->from_slot = 1;
+                     pDestSlots->from_pos = i - nStartOffset;
+                     pDestSlots->match_len = k;
+                     pDestSlots->num_literals = 0;
+                     pDestSlots->score = nScore;
                   }
-
-                  if (nNonRepMatchIdx >= 0) {
-                     memmove(&pDestSlots[nNonRepMatchIdx + 1],
-                        &pDestSlots[nNonRepMatchIdx],
-                        sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - nNonRepMatchIdx - 1));
-
-                     lzsa_arrival* pDestArrival = &pDestSlots[nNonRepMatchIdx];
-                     pDestArrival->cost = nCodingChoiceCost;
-                     pDestArrival->rep_offset = match[m].offset;
-                     pDestArrival->from_slot = 1;
-                     pDestArrival->from_pos = i - nStartOffset;
-                     pDestArrival->match_len = k;
-                     pDestArrival->num_literals = 0;
-                     pDestArrival->score = nScore;
-                  }
-               }                             
+               }
             }
          }
       }
@@ -375,7 +366,7 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
                pBestMatch[i + pMatch->length].length)) {
 
             int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
-            nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
+            nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v1(0) + */ ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
 
             const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);