Store 16-bit lengths and match offsets directly, to simplify decompression on 8-bit CPUs without affecting the compression ratio

2025-02-03 09:31:48 +00:00 · 2019-04-05 10:42:06 +02:00 · 2019-04-05 10:42:06 +02:00 · c7692cf688
commit c7692cf688
parent bdc4e85948
5 changed files with 55 additions and 59 deletions
--- a/README.md
+++ b/README.md
@ -50,27 +50,26 @@ The stream format is composed of:

 # Header format

-The header contains a signature and a traits byte:
+The 3-bytes header contains a signature and a traits byte:

-    0    1    2    3      4
-    0x7b 0x9e 0x0f 0xd7   0x00
+    0    1                2
+    0x7b 0x9e             0x00
    <--- signature --->   <- traits ->

 The traits are set to 0x00 for this version of the format.

 # Frame format

-Each frame contains a 3-byte length followed by block data that expands to up to 64 Kb of decompressed data.
+Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data.

    0    1    2
-    DSZ0 DSZ1 U|E|DSZ2
+    DSZ0 DSZ1 U|DSZ2

 * DSZ0 (length byte 0) contains bits 0-7 of the block data size
 * DSZ1 (length byte 1) contains bits 8-15 of the block data size
 * DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
 * U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
-* E (bit 6 of length byte 2) is set to mark the end of compressed data
-* Bits 1..5 of length byte 2 are currently undefined and must be set to 0 when bit 6 is cleared, and to 1 when bit 6 is set.
+* Bits 1..6 of length byte 2 are currently undefined and must be set to 0.

 # Block data format

@ -90,7 +89,7 @@ The token byte is broken down into three parts:
    7 6 5 4 3 2 1 0
    O L L L M M M M

-* O: set for a 2-byte match offset, clear for a 1-byte match offset
+* O: set for a 2-bytes match offset, clear for a 1-byte match offset
 * L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
 * M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.

@ -130,4 +129,4 @@ If the encoded match length is 15 or more, the 'M' bits in the token form the va

 # Footer format

-The stream ends with the EOD frame: the 3 length bytes are set to 0xFF, 0xFF, 0xFF, and no block data follows.
+The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.
--- a/asm/8088/decompress_small.S
+++ b/asm/8088/decompress_small.S
@ -59,14 +59,15 @@ lzsa_decompress:

   xor ah,ah               ; Get 1-byte match offset
   lodsb
+   inc ax                  ; the match offset is stored off-by-1, increase it
   jmp short .get_match_length

 .get_long_offset:
   lodsw                   ; Get 2-byte match offset
+   test ax,ax
+   je short .done_decompressing ; bail if we hit EOD

 .get_match_length:
-   inc ax                  ; the match offset is stored off-by-1, increase it
-   je short .done_decompressing ; bail if we hit EOD
   xchg ax,dx              ; dx: match offset  ax: original token
   and al,0FH              ; isolate match length in token (MMMM)

@ -97,20 +98,19 @@ lzsa_decompress:

 .get_varlen:
   lodsb                   ; grab extra length byte
-   add cx,ax               ; add extra length byte to length from token

   cmp al,0FFH             ; 3-byte extra length?
   je .large_varlen        ; yes, go grab it

+   add cx,ax               ; add extra length byte to length from token
   cmp al,0FEH             ; 2-byte extra length?
   jne .varlen_done        ; no, we have the full length now, bail

   lodsb                   ; grab extra length byte
-   jmp short .add_and_varlen_done ; go add it and bail
+   add cx,ax               ; add to length from token
+.varlen_done:
+   ret                     ; bail

 .large_varlen:
   lodsw                   ; grab 16-bit extra length
-.add_and_varlen_done:
-   add cx,ax               ; add to length from token
-.varlen_done:
   ret
--- a/src/expand.c
+++ b/src/expand.c
@ -48,12 +48,9 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
         nLiterals += (int)((unsigned int)*pInBlock++);
      }
      else if (nByte == 255) {
-         int nLargeLiterals;
-
         if ((pInBlock + 1) >= pInBlockEnd) return -1;
-         nLargeLiterals = ((unsigned int)*pInBlock++);
-         nLargeLiterals |= (((unsigned int)*pInBlock++) << 8);
-         nLiterals += nLargeLiterals;
+         nLiterals = ((unsigned int)*pInBlock++);
+         nLiterals |= (((unsigned int)*pInBlock++) << 8);
      }
   }

@ -89,12 +86,9 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
         nMatchLen += (int)((unsigned int)*pInBlock++);
      }
      else if (nByte == 255) {
-         int nLargeMatchLen;
-
         if ((pInBlock + 1) >= pInBlockEnd) return -1;
-         nLargeMatchLen = ((unsigned int)*pInBlock++);
-         nLargeMatchLen |= (((unsigned int)*pInBlock++) << 8);
-         nMatchLen += nLargeMatchLen;
+         nMatchLen = ((unsigned int)*pInBlock++);
+         nMatchLen |= (((unsigned int)*pInBlock++) << 8);
      }
   }

@ -193,9 +187,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
         if (token & 0x80) {
            if (pInBlock >= pInBlockEnd) return -1;
            nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            if (nMatchOffset == 0) break;
+         }
+         else {
+            nMatchOffset++;
         }
-         if (nMatchOffset == 0xffff) break;
-         nMatchOffset++;

         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc < pOutData)
@ -229,9 +225,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
         if (token & 0x80) {
            if (pInBlock >= pInBlockEnd) return -1;
            nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            if (nMatchOffset == 0) break;
+         }
+         else {
+            nMatchOffset++;
         }
-         if (nMatchOffset == 0xffff) break;
-         nMatchOffset++;

         const unsigned char *pSrc = pCurOutData - nMatchOffset;
         if (pSrc < pOutData)
--- a/src/main.c
+++ b/src/main.c
@ -225,9 +225,9 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,

   unsigned char cFooter[3];

-   cFooter[0] = 0xFF;         /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
-   cFooter[1] = 0xFF;
-   cFooter[2] = 0xFF;
+   cFooter[0] = 0x00;         /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
+   cFooter[1] = 0x00;
+   cFooter[2] = 0x00;

   if (!bError)
      bError = fwrite(cFooter, 1, 3, f_out) != 3;
@ -370,15 +370,15 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
               (((unsigned int)cBlockSize[2]) << 16);
         }
         else {
-            nBlockSize = 0xffffff;
+            nBlockSize = 0;
         }
      }
      else {
         nBlockSize = nFileSize - 3;
-         nFileSize = 0xffffff;
+         nFileSize = 0;
      }

-      if ((nBlockSize & 0x400000) == 0) {
+      if (nBlockSize != 0) {
         bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
         int nDecompressedSize = 0;

@ -570,15 +570,15 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
               (((unsigned int)cBlockSize[2]) << 16);
         }
         else {
-            nBlockSize = 0xffffff;
+            nBlockSize = 0;
         }
      }
      else {
         nBlockSize = nFileSize - 3;
-         nFileSize = 0xffffff;
+         nFileSize = 0;
      }

-      if ((nBlockSize & 0x400000) == 0) {
+      if (nBlockSize != 0) {
         bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
         int nDecompressedSize = 0;

--- a/src/shrink.c
+++ b/src/shrink.c
@ -410,19 +410,17 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
 */
 static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= LITERALS_RUN_LEN) {
-      nLength -= LITERALS_RUN_LEN;
-
-      if (nLength < 254)
-         pOutData[nOutOffset++] = nLength;
+      if (nLength < (LITERALS_RUN_LEN + 254))
+         pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
      else {
-         if (nLength < 510) {
+         if (nLength < (LITERALS_RUN_LEN + 510)) {
            pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - 254;
+            pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254;
         }
         else {
            pOutData[nOutOffset++] = 255;
-            pOutData[nOutOffset++] = (nLength - 255) & 0xff;
-            pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
         }
      }
   }
@ -463,19 +461,17 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
 */
 static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
   if (nLength >= MATCH_RUN_LEN) {
-      nLength -= MATCH_RUN_LEN;
-
-      if (nLength < 254)
-         pOutData[nOutOffset++] = nLength;
+      if (nLength < (MATCH_RUN_LEN + 254))
+         pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
      else {
-         if (nLength < 510) {
+         if (nLength < (MATCH_RUN_LEN + 510)) {
            pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - 254;
+            pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254;
         }
         else {
            pOutData[nOutOffset++] = 255;
-            pOutData[nOutOffset++] = (nLength - 255) & 0xff;
-            pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
         }
      }
   }
@ -606,10 +602,13 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
            nNumLiterals = 0;
         }

-         pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
-         if (nNibbleLongOffset)
-            pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
-
+         if (nNibbleLongOffset) {
+            pOutData[nOutOffset++] = nMatchOffset & 0xff;
+            pOutData[nOutOffset++] = nMatchOffset >> 8;
+         }
+         else {
+            pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
+         }
         nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
         i += nMatchLen;
      }