diff --git a/README.md b/README.md index 3a391ea..403e531 100755 --- a/README.md +++ b/README.md @@ -50,27 +50,26 @@ The stream format is composed of: # Header format -The header contains a signature and a traits byte: +The 3-bytes header contains a signature and a traits byte: - 0 1 2 3 4 - 0x7b 0x9e 0x0f 0xd7 0x00 + 0 1 2 + 0x7b 0x9e 0x00 <--- signature ---> <- traits -> The traits are set to 0x00 for this version of the format. # Frame format -Each frame contains a 3-byte length followed by block data that expands to up to 64 Kb of decompressed data. +Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data. 0 1 2 - DSZ0 DSZ1 U|E|DSZ2 + DSZ0 DSZ1 U|DSZ2 * DSZ0 (length byte 0) contains bits 0-7 of the block data size * DSZ1 (length byte 1) contains bits 8-15 of the block data size * DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size * U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed. -* E (bit 6 of length byte 2) is set to mark the end of compressed data -* Bits 1..5 of length byte 2 are currently undefined and must be set to 0 when bit 6 is cleared, and to 1 when bit 6 is set. +* Bits 1..6 of length byte 2 are currently undefined and must be set to 0. # Block data format @@ -90,7 +89,7 @@ The token byte is broken down into three parts: 7 6 5 4 3 2 1 0 O L L L M M M M -* O: set for a 2-byte match offset, clear for a 1-byte match offset +* O: set for a 2-bytes match offset, clear for a 1-byte match offset * L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length' * M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7. @@ -130,4 +129,4 @@ If the encoded match length is 15 or more, the 'M' bits in the token form the va # Footer format -The stream ends with the EOD frame: the 3 length bytes are set to 0xFF, 0xFF, 0xFF, and no block data follows. +The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows. diff --git a/asm/8088/decompress_small.S b/asm/8088/decompress_small.S index 5f3453c..40fdb9e 100755 --- a/asm/8088/decompress_small.S +++ b/asm/8088/decompress_small.S @@ -59,14 +59,15 @@ lzsa_decompress: xor ah,ah ; Get 1-byte match offset lodsb + inc ax ; the match offset is stored off-by-1, increase it jmp short .get_match_length .get_long_offset: lodsw ; Get 2-byte match offset + test ax,ax + je short .done_decompressing ; bail if we hit EOD .get_match_length: - inc ax ; the match offset is stored off-by-1, increase it - je short .done_decompressing ; bail if we hit EOD xchg ax,dx ; dx: match offset ax: original token and al,0FH ; isolate match length in token (MMMM) @@ -97,20 +98,19 @@ lzsa_decompress: .get_varlen: lodsb ; grab extra length byte - add cx,ax ; add extra length byte to length from token cmp al,0FFH ; 3-byte extra length? je .large_varlen ; yes, go grab it + add cx,ax ; add extra length byte to length from token cmp al,0FEH ; 2-byte extra length? jne .varlen_done ; no, we have the full length now, bail lodsb ; grab extra length byte - jmp short .add_and_varlen_done ; go add it and bail + add cx,ax ; add to length from token +.varlen_done: + ret ; bail .large_varlen: lodsw ; grab 16-bit extra length -.add_and_varlen_done: - add cx,ax ; add to length from token -.varlen_done: ret diff --git a/src/expand.c b/src/expand.c index 6f0121c..4d4d8a1 100755 --- a/src/expand.c +++ b/src/expand.c @@ -48,12 +48,9 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p nLiterals += (int)((unsigned int)*pInBlock++); } else if (nByte == 255) { - int nLargeLiterals; - if ((pInBlock + 1) >= pInBlockEnd) return -1; - nLargeLiterals = ((unsigned int)*pInBlock++); - nLargeLiterals |= (((unsigned int)*pInBlock++) << 8); - nLiterals += nLargeLiterals; + nLiterals = ((unsigned int)*pInBlock++); + nLiterals |= (((unsigned int)*pInBlock++) << 8); } } @@ -89,12 +86,9 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn nMatchLen += (int)((unsigned int)*pInBlock++); } else if (nByte == 255) { - int nLargeMatchLen; - if ((pInBlock + 1) >= pInBlockEnd) return -1; - nLargeMatchLen = ((unsigned int)*pInBlock++); - nLargeMatchLen |= (((unsigned int)*pInBlock++) << 8); - nMatchLen += nLargeMatchLen; + nMatchLen = ((unsigned int)*pInBlock++); + nMatchLen |= (((unsigned int)*pInBlock++) << 8); } } @@ -193,9 +187,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch if (token & 0x80) { if (pInBlock >= pInBlockEnd) return -1; nMatchOffset |= (((unsigned int)*pInBlock++) << 8); + if (nMatchOffset == 0) break; + } + else { + nMatchOffset++; } - if (nMatchOffset == 0xffff) break; - nMatchOffset++; const unsigned char *pSrc = pCurOutData - nMatchOffset; if (pSrc < pOutData) @@ -229,9 +225,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch if (token & 0x80) { if (pInBlock >= pInBlockEnd) return -1; nMatchOffset |= (((unsigned int)*pInBlock++) << 8); + if (nMatchOffset == 0) break; + } + else { + nMatchOffset++; } - if (nMatchOffset == 0xffff) break; - nMatchOffset++; const unsigned char *pSrc = pCurOutData - nMatchOffset; if (pSrc < pOutData) diff --git a/src/main.c b/src/main.c index cb87572..9c7df55 100755 --- a/src/main.c +++ b/src/main.c @@ -225,9 +225,9 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, unsigned char cFooter[3]; - cFooter[0] = 0xFF; /* EOD frame (written even in raw mode, so that the end of the data can be detected) */ - cFooter[1] = 0xFF; - cFooter[2] = 0xFF; + cFooter[0] = 0x00; /* EOD frame (written even in raw mode, so that the end of the data can be detected) */ + cFooter[1] = 0x00; + cFooter[2] = 0x00; if (!bError) bError = fwrite(cFooter, 1, 3, f_out) != 3; @@ -370,15 +370,15 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename (((unsigned int)cBlockSize[2]) << 16); } else { - nBlockSize = 0xffffff; + nBlockSize = 0; } } else { nBlockSize = nFileSize - 3; - nFileSize = 0xffffff; + nFileSize = 0; } - if ((nBlockSize & 0x400000) == 0) { + if (nBlockSize != 0) { bool bIsUncompressed = (nBlockSize & 0x800000) != 0; int nDecompressedSize = 0; @@ -570,15 +570,15 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c (((unsigned int)cBlockSize[2]) << 16); } else { - nBlockSize = 0xffffff; + nBlockSize = 0; } } else { nBlockSize = nFileSize - 3; - nFileSize = 0xffffff; + nFileSize = 0; } - if ((nBlockSize & 0x400000) == 0) { + if (nBlockSize != 0) { bool bIsUncompressed = (nBlockSize & 0x800000) != 0; int nDecompressedSize = 0; diff --git a/src/shrink.c b/src/shrink.c index 364a41f..d2d9673 100755 --- a/src/shrink.c +++ b/src/shrink.c @@ -410,19 +410,17 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) { */ static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) { if (nLength >= LITERALS_RUN_LEN) { - nLength -= LITERALS_RUN_LEN; - - if (nLength < 254) - pOutData[nOutOffset++] = nLength; + if (nLength < (LITERALS_RUN_LEN + 254)) + pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN; else { - if (nLength < 510) { + if (nLength < (LITERALS_RUN_LEN + 510)) { pOutData[nOutOffset++] = 254; - pOutData[nOutOffset++] = nLength - 254; + pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254; } else { pOutData[nOutOffset++] = 255; - pOutData[nOutOffset++] = (nLength - 255) & 0xff; - pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; } } } @@ -463,19 +461,17 @@ static inline int lzsa_get_match_varlen_size(const int nLength) { */ static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) { if (nLength >= MATCH_RUN_LEN) { - nLength -= MATCH_RUN_LEN; - - if (nLength < 254) - pOutData[nOutOffset++] = nLength; + if (nLength < (MATCH_RUN_LEN + 254)) + pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN; else { - if (nLength < 510) { + if (nLength < (MATCH_RUN_LEN + 510)) { pOutData[nOutOffset++] = 254; - pOutData[nOutOffset++] = nLength - 254; + pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254; } else { pOutData[nOutOffset++] = 255; - pOutData[nOutOffset++] = (nLength - 255) & 0xff; - pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff; + pOutData[nOutOffset++] = nLength & 0xff; + pOutData[nOutOffset++] = (nLength >> 8) & 0xff; } } } @@ -606,10 +602,13 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p nNumLiterals = 0; } - pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff; - if (nNibbleLongOffset) - pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8; - + if (nNibbleLongOffset) { + pOutData[nOutOffset++] = nMatchOffset & 0xff; + pOutData[nOutOffset++] = nMatchOffset >> 8; + } + else { + pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff; + } nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen); i += nMatchLen; }