From a7850104488de06935400ca61c4d290ccee3a5a9 Mon Sep 17 00:00:00 2001
From: emmanuel-marty <brothers@puzzlebrothers.com>
Date: Fri, 5 Apr 2019 23:16:05 +0200
Subject: [PATCH] Revert token to O|LLL|MMMM; revert to always shifting the
 match offset by 1; set raw block end marker as a large zero-size match

---
 README.md                   | 16 +++++++-------
 asm/8088/decompress_small.S | 17 ++++++++-------
 src/expand.c                | 22 +++++++------------
 src/main.c                  | 42 +++++++++++++++++++++++--------------
 src/shrink.c                | 15 ++++++-------
 5 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/README.md b/README.md
index 47a09e5..0c39ee2 100755
--- a/README.md
+++ b/README.md
@@ -75,7 +75,7 @@ Each frame contains a 3-bytes length followed by block data that expands to up t
 
 LZSA blocks are composed from consecutive commands. Each command follows this format:
 
-* token: <LLL|MMMM|O>
+* token: <O|LLL|MMMM>
 * optional extra literal length
 * literal values
 * match offset low
@@ -87,7 +87,7 @@ LZSA blocks are composed from consecutive commands. Each command follows this fo
 The token byte is broken down into three parts:
 
     7 6 5 4 3 2 1 0
-    L L L M M M M O
+    O L L L M M M M
 
 * L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
 * M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
@@ -113,21 +113,19 @@ The low 8 bits of the match offset follows.
 
 **optional match offset high**
 
-If the 'O' bit (bit 0) is set in the token, the high 8 bits of the match offset follow, otherwise they are understood to be all set to 0.
+If the 'O' bit (bit 7) is set in the token, the high 8 bits of the match offset follow, otherwise they are understood to be all set to 0.
 
-**important note regarding short match offsets: off by 1**
+**important note regarding match offsets: off by 1**
 
-Note that the match offset is *off by 1* when encoded as a single byte (the O bit in the token is set to 0): a value of 0 refers to the byte preceding the current output index (N-1). A value of 1 refers to two bytes before the current output index (N-2) and so on. This is so that match offsets up to 256 can be encoded as a single byte, for extra compression.
-
-When match offsets are encoded as two bytes (the O bit in the token is set to 1), they are stored directly: a value of 1 refers to the byte preceding the current output index (N-1), and so on.
+Note that the match offset is *off by 1*: a value of 0 refers to the byte preceding the current output index (N-1). A value of 1 refers to two bytes before the current output index (N-2) and so on. This is so that match offsets up to 256 can be encoded as a single byte, for extra compression.
 
 **optional extra encoded match length**
 
 If the encoded match length is 15 or more, the 'M' bits in the token form the value 15, and an extra byte follows here, with three possible types of value.
 
-* 0-253: the value is added to the 15 stored in the token.
+* 0-253: the value is added to the 15 stored in the token. The final value is 3 + 15 + this byte.
 * 254: a second byte follows. The final encoded match length is 15 + 254 + the second byte, which gives an actual match length of 3 + 15 + 254 + the second byte.
-* 255: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is that 16-bit value.
+* 255: a second and third byte follow, forming a little-endian 16-bit value. The final encoded match length is 3 + that 16-bit value.
 
 # Footer format
 
diff --git a/asm/8088/decompress_small.S b/asm/8088/decompress_small.S
index 0e8c7b7..4ae6a75 100755
--- a/asm/8088/decompress_small.S
+++ b/asm/8088/decompress_small.S
@@ -41,9 +41,9 @@ lzsa_decompress:
    lodsb                   ; read token byte: LLL|MMMM|O
    mov dx,ax               ; keep token in dl
    
-   mov cl,3
-   rol al,cl               ; shift literals length into place
-   and al,07H              ; isolate literals length in token (LLL)
+   and al,070H             ; isolate literals length in token (LLL)
+   mov cl,4
+   shr al,cl               ; shift literals length into place
 
    mov cx,ax               ; copy literals length into cx
    cmp al,07H              ; LITERALS_RUN_LEN?
@@ -54,20 +54,18 @@ lzsa_decompress:
 .copy_literals:
    rep movsb               ; copy cx literals from ds:si to es:di
 
-   ror dl,1                ; check match offset size in token (O bit)
-   jc .get_long_offset
+   test dl,dl              ; check match offset size in token (O bit)
+   js .get_long_offset
 
    xchg ax,cx              ; clear ah - cx is zero from the rep movsb above
    lodsb
-   inc ax                  ; the match offset is stored off-by-1, increase it
    jmp short .get_match_length
 
 .get_long_offset:
    lodsw                   ; Get 2-byte match offset
-   test ax,ax
-   je short .done_decompressing ; bail if we hit EOD
 
 .get_match_length:
+   inc ax                  ; the match offset is stored off-by-1, increase it
    xchg ax,dx              ; dx: match offset  ax: original token
    and al,0FH              ; isolate match length in token (MMMM)
 
@@ -76,6 +74,8 @@ lzsa_decompress:
    jne .copy_match         ; no, we have the full match length from the token, go copy
    
    call .get_varlen        ; get complete match length
+   test cx,cx
+   je short .done_decompressing ; bail if we hit EOD
 
 .copy_match:
    add cx,3                ; add MIN_MATCH_SIZE to get the final match length to copy
@@ -113,4 +113,5 @@ lzsa_decompress:
 
 .large_varlen:
    lodsw                   ; grab 16-bit extra length
+   mov cx,ax
    ret
diff --git a/src/expand.c b/src/expand.c
index 02f5a3e..68b2687 100755
--- a/src/expand.c
+++ b/src/expand.c
@@ -168,7 +168,7 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
 
    while (pInBlock < pInBlockFastEnd && pCurOutData < pOutDataFastEnd) {
       const unsigned char token = *pInBlock++;
-      int nLiterals = (int)((unsigned int)((token & 0xe0) >> 5));
+      int nLiterals = (int)((unsigned int)((token & 0x70) >> 4));
 
       if (nLiterals < LITERALS_RUN_LEN) {
          memcpy(pCurOutData, pInBlock, 8);
@@ -184,20 +184,17 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
          int nMatchOffset;
 
          nMatchOffset = ((unsigned int)*pInBlock++);
-         if (token & 0x01) {
+         if (token & 0x80) {
             if (pInBlock >= pInBlockEnd) return -1;
             nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
-            if (nMatchOffset == 0) break;
-         }
-         else {
-            nMatchOffset++;
          }
+         nMatchOffset++;
 
          const unsigned char *pSrc = pCurOutData - nMatchOffset;
          if (pSrc < pOutData)
             return -1;
 
-         int nMatchLen = (int)((unsigned int)((token & 0x1e) >> 1));
+         int nMatchLen = (int)((unsigned int)(token & 0x0f));
          if (nMatchLen < (16 - MIN_MATCH_SIZE + 1) && (pSrc + MIN_MATCH_SIZE + nMatchLen) < pCurOutData && pCurOutData < pOutDataFastEnd) {
             memcpy(pCurOutData, pSrc, 16);
             pCurOutData += (MIN_MATCH_SIZE + nMatchLen);
@@ -213,7 +210,7 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
 
    while (pInBlock < pInBlockEnd) {
       const unsigned char token = *pInBlock++;
-      int nLiterals = (int)((unsigned int)((token & 0xe0) >> 5));
+      int nLiterals = (int)((unsigned int)((token & 0x70) >> 4));
 
       if (lzsa_expand_literals_slow(&pInBlock, pInBlockEnd, nLiterals, &pCurOutData, pOutDataEnd))
          return -1;
@@ -222,20 +219,17 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
          int nMatchOffset;
 
          nMatchOffset = ((unsigned int)*pInBlock++);
-         if (token & 0x01) {
+         if (token & 0x80) {
             if (pInBlock >= pInBlockEnd) return -1;
             nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
-            if (nMatchOffset == 0) break;
-         }
-         else {
-            nMatchOffset++;
          }
+         nMatchOffset++;
 
          const unsigned char *pSrc = pCurOutData - nMatchOffset;
          if (pSrc < pOutData)
             return -1;
 
-         int nMatchLen = (int)((unsigned int)((token & 0x1e) >> 1));
+         int nMatchLen = (int)((unsigned int)(token & 0x0f));
          if (lzsa_expand_match_slow(&pInBlock, pInBlockEnd, pSrc, nMatchLen, &pCurOutData, pOutDataEnd, pOutDataFastEnd))
             return -1;
       }
diff --git a/src/main.c b/src/main.c
index dd4b243..72116c1 100755
--- a/src/main.c
+++ b/src/main.c
@@ -223,16 +223,26 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
       }
    }
 
-   unsigned char cFooter[3];
-   int nFooterSize = ((nOptions & OPT_RAW) == 0) ? 3 : 2;
+   unsigned char cFooter[4];
+   int nFooterSize;
 
-   cFooter[0] = 0x00;         /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
-   cFooter[1] = 0x00;
-   cFooter[2] = 0x00;
+   if ((nOptions & OPT_RAW) != 0) {
+      cFooter[0] = 0x00;         /* EOD marker for raw block */
+      cFooter[1] = 0xff;         
+      cFooter[2] = 0x00;
+      cFooter[3] = 0x00;
+      nFooterSize = 4;
+   }
+   else {
+      cFooter[0] = 0x00;         /* EOD frame */
+      cFooter[1] = 0x00;
+      cFooter[2] = 0x00;
+      nFooterSize = 3;
+   }
 
    if (!bError)
       bError = fwrite(cFooter, 1, nFooterSize, f_out) != nFooterSize;
-   nCompressedSize += (long long) nFooterSize;
+   nCompressedSize += (long long)nFooterSize;
 
    if (!bError && (nOptions & OPT_VERBOSE)) {
       nEndTime = lzsa_get_time();
@@ -305,7 +315,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
       nFileSize = (unsigned int)ftell(pInFile);
       fseek(pInFile, 0, SEEK_SET);
 
-      if (nFileSize < 2) {
+      if (nFileSize < 4) {
          fclose(pInFile);
          pInFile = NULL;
          fprintf(stderr, "invalid file size for raw block mode\n");
@@ -371,15 +381,15 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
                (((unsigned int)cBlockSize[2]) << 16);
          }
          else {
-            nBlockSize = 0;
+            nBlockSize = 0xffffff;
          }
       }
       else {
-         nBlockSize = nFileSize - 2;
-         nFileSize = 0;
+         nBlockSize = nFileSize - 4;
+         nFileSize = 0xffffff;
       }
 
-      if (nBlockSize != 0) {
+      if ((nBlockSize & 0x400000) == 0) {
          bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
          int nDecompressedSize = 0;
 
@@ -483,7 +493,7 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
       nFileSize = (unsigned int)ftell(pInFile);
       fseek(pInFile, 0, SEEK_SET);
 
-      if (nFileSize < 2) {
+      if (nFileSize < 4) {
          fclose(pInFile);
          pInFile = NULL;
          fprintf(stderr, "invalid file size for raw block mode\n");
@@ -571,15 +581,15 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
                (((unsigned int)cBlockSize[2]) << 16);
          }
          else {
-            nBlockSize = 0;
+            nBlockSize = 0xffffff;
          }
       }
       else {
-         nBlockSize = nFileSize - 2;
-         nFileSize = 0;
+         nBlockSize = nFileSize - 4;
+         nFileSize = 0xffffff;
       }
 
-      if (nBlockSize != 0) {
+      if ((nBlockSize & 0x400000) == 0) {
          bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
          int nDecompressedSize = 0;
 
diff --git a/src/shrink.c b/src/shrink.c
index 00d0bc3..37c12d9 100755
--- a/src/shrink.c
+++ b/src/shrink.c
@@ -578,7 +578,7 @@ static void lzsa_optimize_command_count(lsza_compressor *pCompressor, const int
          int nMatchOffset = pMatch->offset;
          int nMatchLen = pMatch->length;
          int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
-         int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x01;
+         int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
          int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNibbleLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
 
          if ((((nNumLiterals + nMatchLen) < LITERALS_RUN_LEN && nTokenSize >= nMatchLen) || 
@@ -631,7 +631,7 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
          int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
          int nNibbleLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
          int nNibbleMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen;
-         int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x01;
+         int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
          int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + nNumLiterals + (nNibbleLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
 
          if ((nOutOffset + nTokenSize) > nMaxOutDataSize)
@@ -639,7 +639,7 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
          if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
             return -1;
 
-         pOutData[nOutOffset++] = (nNibbleLiteralsLen << 5) | (nNibbleMatchLen << 1) | nNibbleLongOffset;
+         pOutData[nOutOffset++] = nNibbleLongOffset | (nNibbleLiteralsLen << 4) | nNibbleMatchLen;
          nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
 
          if (nNumLiterals != 0) {
@@ -648,12 +648,9 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
             nNumLiterals = 0;
          }
 
+         pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
          if (nNibbleLongOffset) {
-            pOutData[nOutOffset++] = nMatchOffset & 0xff;
-            pOutData[nOutOffset++] = nMatchOffset >> 8;
-         }
-         else {
-            pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
+            pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
          }
          nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
          i += nMatchLen;
@@ -675,7 +672,7 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
       if ((nOutOffset + nTokenSize) > nMaxOutDataSize)
          return -1;
 
-      pOutData[nOutOffset++] = (nNibbleLiteralsLen << 5) | 0x01;
+      pOutData[nOutOffset++] = (nNibbleLiteralsLen << 4) | 0x0f;
       nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
 
       if (nNumLiterals != 0) {