diff --git a/README.md b/README.md
index 3a391ea..403e531 100755
--- a/README.md
+++ b/README.md
@@ -50,27 +50,26 @@ The stream format is composed of:
 
 # Header format
 
-The header contains a signature and a traits byte:
+The 3-bytes header contains a signature and a traits byte:
 
-    0    1    2    3      4
-    0x7b 0x9e 0x0f 0xd7   0x00
+    0    1                2
+    0x7b 0x9e             0x00
     <--- signature --->   <- traits ->
 
 The traits are set to 0x00 for this version of the format.
 
 # Frame format
 
-Each frame contains a 3-byte length followed by block data that expands to up to 64 Kb of decompressed data.
+Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data.
 
     0    1    2
-    DSZ0 DSZ1 U|E|DSZ2
+    DSZ0 DSZ1 U|DSZ2
 
 * DSZ0 (length byte 0) contains bits 0-7 of the block data size
 * DSZ1 (length byte 1) contains bits 8-15 of the block data size
 * DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
 * U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
-* E (bit 6 of length byte 2) is set to mark the end of compressed data
-* Bits 1..5 of length byte 2 are currently undefined and must be set to 0 when bit 6 is cleared, and to 1 when bit 6 is set.
+* Bits 1..6 of length byte 2 are currently undefined and must be set to 0.
 
 # Block data format
 
@@ -90,7 +89,7 @@ The token byte is broken down into three parts:
     7 6 5 4 3 2 1 0
     O L L L M M M M
 
-* O: set for a 2-byte match offset, clear for a 1-byte match offset
+* O: set for a 2-bytes match offset, clear for a 1-byte match offset
 * L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
 * M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
 
@@ -130,4 +129,4 @@ If the encoded match length is 15 or more, the 'M' bits in the token form the va
 
 # Footer format
 
-The stream ends with the EOD frame: the 3 length bytes are set to 0xFF, 0xFF, 0xFF, and no block data follows.
+The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.
diff --git a/asm/8088/decompress_small.S b/asm/8088/decompress_small.S
index 5f3453c..40fdb9e 100755
--- a/asm/8088/decompress_small.S
+++ b/asm/8088/decompress_small.S
@@ -59,14 +59,15 @@ lzsa_decompress:
 
    xor ah,ah               ; Get 1-byte match offset
    lodsb
+   inc ax                  ; the match offset is stored off-by-1, increase it
    jmp short .get_match_length
 
 .get_long_offset:
    lodsw                   ; Get 2-byte match offset
+   test ax,ax
+   je short .done_decompressing ; bail if we hit EOD
 
 .get_match_length:
-   inc ax                  ; the match offset is stored off-by-1, increase it
-   je short .done_decompressing ; bail if we hit EOD
    xchg ax,dx              ; dx: match offset  ax: original token
    and al,0FH              ; isolate match length in token (MMMM)
 
@@ -97,20 +98,19 @@ lzsa_decompress:
 
 .get_varlen:
    lodsb                   ; grab extra length byte
-   add cx,ax               ; add extra length byte to length from token
 
    cmp al,0FFH             ; 3-byte extra length?
    je .large_varlen        ; yes, go grab it
 
+   add cx,ax               ; add extra length byte to length from token
    cmp al,0FEH             ; 2-byte extra length?
    jne .varlen_done        ; no, we have the full length now, bail
 
    lodsb                   ; grab extra length byte
-   jmp short .add_and_varlen_done ; go add it and bail
+   add cx,ax               ; add to length from token
+.varlen_done:
+   ret                     ; bail
 
 .large_varlen:
    lodsw                   ; grab 16-bit extra length
-.add_and_varlen_done:
-   add cx,ax               ; add to length from token
-.varlen_done:
    ret
diff --git a/src/expand.c b/src/expand.c
index 6f0121c..4d4d8a1 100755
--- a/src/expand.c
+++ b/src/expand.c
@@ -48,12 +48,9 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
          nLiterals += (int)((unsigned int)*pInBlock++);
       }
       else if (nByte == 255) {
-         int nLargeLiterals;
-
          if ((pInBlock + 1) >= pInBlockEnd) return -1;
-         nLargeLiterals = ((unsigned int)*pInBlock++);
-         nLargeLiterals |= (((unsigned int)*pInBlock++) << 8);
-         nLiterals += nLargeLiterals;
+         nLiterals = ((unsigned int)*pInBlock++);
+         nLiterals |= (((unsigned int)*pInBlock++) << 8);
       }
    }
 
@@ -89,12 +86,9 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
          nMatchLen += (int)((unsigned int)*pInBlock++);
       }
       else if (nByte == 255) {
-         int nLargeMatchLen;
-
          if ((pInBlock + 1) >= pInBlockEnd) return -1;
-         nLargeMatchLen = ((unsigned int)*pInBlock++);
-         nLargeMatchLen |= (((unsigned int)*pInBlock++) << 8);
-         nMatchLen += nLargeMatchLen;
+         nMatchLen = ((unsigned int)*pInBlock++);
+         nMatchLen |= (((unsigned int)*pInBlock++) << 8);
       }
    }
 
@@ -193,9 +187,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
          if (token & 0x80) {
             if (pInBlock >= pInBlockEnd) return -1;
             nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            if (nMatchOffset == 0) break;
+         }
+         else {
+            nMatchOffset++;
          }
-         if (nMatchOffset == 0xffff) break;
-         nMatchOffset++;
 
          const unsigned char *pSrc = pCurOutData - nMatchOffset;
          if (pSrc < pOutData)
@@ -229,9 +225,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
          if (token & 0x80) {
             if (pInBlock >= pInBlockEnd) return -1;
             nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
+            if (nMatchOffset == 0) break;
+         }
+         else {
+            nMatchOffset++;
          }
-         if (nMatchOffset == 0xffff) break;
-         nMatchOffset++;
 
          const unsigned char *pSrc = pCurOutData - nMatchOffset;
          if (pSrc < pOutData)
diff --git a/src/main.c b/src/main.c
index cb87572..9c7df55 100755
--- a/src/main.c
+++ b/src/main.c
@@ -225,9 +225,9 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
 
    unsigned char cFooter[3];
 
-   cFooter[0] = 0xFF;         /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
-   cFooter[1] = 0xFF;
-   cFooter[2] = 0xFF;
+   cFooter[0] = 0x00;         /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
+   cFooter[1] = 0x00;
+   cFooter[2] = 0x00;
 
    if (!bError)
       bError = fwrite(cFooter, 1, 3, f_out) != 3;
@@ -370,15 +370,15 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
                (((unsigned int)cBlockSize[2]) << 16);
          }
          else {
-            nBlockSize = 0xffffff;
+            nBlockSize = 0;
          }
       }
       else {
          nBlockSize = nFileSize - 3;
-         nFileSize = 0xffffff;
+         nFileSize = 0;
       }
 
-      if ((nBlockSize & 0x400000) == 0) {
+      if (nBlockSize != 0) {
          bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
          int nDecompressedSize = 0;
 
@@ -570,15 +570,15 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
                (((unsigned int)cBlockSize[2]) << 16);
          }
          else {
-            nBlockSize = 0xffffff;
+            nBlockSize = 0;
          }
       }
       else {
          nBlockSize = nFileSize - 3;
-         nFileSize = 0xffffff;
+         nFileSize = 0;
       }
 
-      if ((nBlockSize & 0x400000) == 0) {
+      if (nBlockSize != 0) {
          bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
          int nDecompressedSize = 0;
 
diff --git a/src/shrink.c b/src/shrink.c
index 364a41f..d2d9673 100755
--- a/src/shrink.c
+++ b/src/shrink.c
@@ -410,19 +410,17 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
  */
 static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
    if (nLength >= LITERALS_RUN_LEN) {
-      nLength -= LITERALS_RUN_LEN;
-
-      if (nLength < 254)
-         pOutData[nOutOffset++] = nLength;
+      if (nLength < (LITERALS_RUN_LEN + 254))
+         pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
       else {
-         if (nLength < 510) {
+         if (nLength < (LITERALS_RUN_LEN + 510)) {
             pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - 254;
+            pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254;
          }
          else {
             pOutData[nOutOffset++] = 255;
-            pOutData[nOutOffset++] = (nLength - 255) & 0xff;
-            pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
          }
       }
    }
@@ -463,19 +461,17 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
  */
 static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
    if (nLength >= MATCH_RUN_LEN) {
-      nLength -= MATCH_RUN_LEN;
-
-      if (nLength < 254)
-         pOutData[nOutOffset++] = nLength;
+      if (nLength < (MATCH_RUN_LEN + 254))
+         pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
       else {
-         if (nLength < 510) {
+         if (nLength < (MATCH_RUN_LEN + 510)) {
             pOutData[nOutOffset++] = 254;
-            pOutData[nOutOffset++] = nLength - 254;
+            pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254;
          }
          else {
             pOutData[nOutOffset++] = 255;
-            pOutData[nOutOffset++] = (nLength - 255) & 0xff;
-            pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
+            pOutData[nOutOffset++] = nLength & 0xff;
+            pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
          }
       }
    }
@@ -606,10 +602,13 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
             nNumLiterals = 0;
          }
 
-         pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
-         if (nNibbleLongOffset)
-            pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
-
+         if (nNibbleLongOffset) {
+            pOutData[nOutOffset++] = nMatchOffset & 0xff;
+            pOutData[nOutOffset++] = nMatchOffset >> 8;
+         }
+         else {
+            pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
+         }
          nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
          i += nMatchLen;
       }