mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-02-03 09:31:48 +00:00
Store 16-bit lengths and match offsets directly, to simplify decompression on 8-bit CPUs without affecting the compression ratio
This commit is contained in:
parent
bdc4e85948
commit
c7692cf688
17
README.md
17
README.md
@ -50,27 +50,26 @@ The stream format is composed of:
|
||||
|
||||
# Header format
|
||||
|
||||
The header contains a signature and a traits byte:
|
||||
The 3-bytes header contains a signature and a traits byte:
|
||||
|
||||
0 1 2 3 4
|
||||
0x7b 0x9e 0x0f 0xd7 0x00
|
||||
0 1 2
|
||||
0x7b 0x9e 0x00
|
||||
<--- signature ---> <- traits ->
|
||||
|
||||
The traits are set to 0x00 for this version of the format.
|
||||
|
||||
# Frame format
|
||||
|
||||
Each frame contains a 3-byte length followed by block data that expands to up to 64 Kb of decompressed data.
|
||||
Each frame contains a 3-bytes length followed by block data that expands to up to 64 Kb of decompressed data.
|
||||
|
||||
0 1 2
|
||||
DSZ0 DSZ1 U|E|DSZ2
|
||||
DSZ0 DSZ1 U|DSZ2
|
||||
|
||||
* DSZ0 (length byte 0) contains bits 0-7 of the block data size
|
||||
* DSZ1 (length byte 1) contains bits 8-15 of the block data size
|
||||
* DSZ2 (bit 0 of length byte 2) contains bit 16 of the block data size
|
||||
* U (bit 7 of length byte 2) is set if the block data is uncompressed, and clear if the block data is compressed.
|
||||
* E (bit 6 of length byte 2) is set to mark the end of compressed data
|
||||
* Bits 1..5 of length byte 2 are currently undefined and must be set to 0 when bit 6 is cleared, and to 1 when bit 6 is set.
|
||||
* Bits 1..6 of length byte 2 are currently undefined and must be set to 0.
|
||||
|
||||
# Block data format
|
||||
|
||||
@ -90,7 +89,7 @@ The token byte is broken down into three parts:
|
||||
7 6 5 4 3 2 1 0
|
||||
O L L L M M M M
|
||||
|
||||
* O: set for a 2-byte match offset, clear for a 1-byte match offset
|
||||
* O: set for a 2-bytes match offset, clear for a 1-byte match offset
|
||||
* L: 3-bit literals length (0-6, or 7 if extended). If the number of literals for this command is 0 to 6, the length is encoded in the token and no extra bytes are required. Otherwise, a value of 7 is encoded and extra bytes follow as 'optional extra literal length'
|
||||
* M: 4-bit encoded match length (0-14, or 15 if extended). Likewise, if the encoded match length for this command is 0 to 14, it is directly stored, otherwise 15 is stored and extra bytes follow as 'optional extra encoded match length'. Except for the last command in a block, a command always contains a match, so the encoded match length is the actual match length offset by the minimum, which is 3 bytes. For instance, an actual match length of 10 bytes to be copied, is encoded as 7.
|
||||
|
||||
@ -130,4 +129,4 @@ If the encoded match length is 15 or more, the 'M' bits in the token form the va
|
||||
|
||||
# Footer format
|
||||
|
||||
The stream ends with the EOD frame: the 3 length bytes are set to 0xFF, 0xFF, 0xFF, and no block data follows.
|
||||
The stream ends with the EOD frame: the 3 length bytes are set to 0x00, 0x00, 0x00, and no block data follows.
|
||||
|
@ -59,14 +59,15 @@ lzsa_decompress:
|
||||
|
||||
xor ah,ah ; Get 1-byte match offset
|
||||
lodsb
|
||||
inc ax ; the match offset is stored off-by-1, increase it
|
||||
jmp short .get_match_length
|
||||
|
||||
.get_long_offset:
|
||||
lodsw ; Get 2-byte match offset
|
||||
test ax,ax
|
||||
je short .done_decompressing ; bail if we hit EOD
|
||||
|
||||
.get_match_length:
|
||||
inc ax ; the match offset is stored off-by-1, increase it
|
||||
je short .done_decompressing ; bail if we hit EOD
|
||||
xchg ax,dx ; dx: match offset ax: original token
|
||||
and al,0FH ; isolate match length in token (MMMM)
|
||||
|
||||
@ -97,20 +98,19 @@ lzsa_decompress:
|
||||
|
||||
.get_varlen:
|
||||
lodsb ; grab extra length byte
|
||||
add cx,ax ; add extra length byte to length from token
|
||||
|
||||
cmp al,0FFH ; 3-byte extra length?
|
||||
je .large_varlen ; yes, go grab it
|
||||
|
||||
add cx,ax ; add extra length byte to length from token
|
||||
cmp al,0FEH ; 2-byte extra length?
|
||||
jne .varlen_done ; no, we have the full length now, bail
|
||||
|
||||
lodsb ; grab extra length byte
|
||||
jmp short .add_and_varlen_done ; go add it and bail
|
||||
add cx,ax ; add to length from token
|
||||
.varlen_done:
|
||||
ret ; bail
|
||||
|
||||
.large_varlen:
|
||||
lodsw ; grab 16-bit extra length
|
||||
.add_and_varlen_done:
|
||||
add cx,ax ; add to length from token
|
||||
.varlen_done:
|
||||
ret
|
||||
|
26
src/expand.c
26
src/expand.c
@ -48,12 +48,9 @@ static inline FORCE_INLINE int lzsa_expand_literals_slow(const unsigned char **p
|
||||
nLiterals += (int)((unsigned int)*pInBlock++);
|
||||
}
|
||||
else if (nByte == 255) {
|
||||
int nLargeLiterals;
|
||||
|
||||
if ((pInBlock + 1) >= pInBlockEnd) return -1;
|
||||
nLargeLiterals = ((unsigned int)*pInBlock++);
|
||||
nLargeLiterals |= (((unsigned int)*pInBlock++) << 8);
|
||||
nLiterals += nLargeLiterals;
|
||||
nLiterals = ((unsigned int)*pInBlock++);
|
||||
nLiterals |= (((unsigned int)*pInBlock++) << 8);
|
||||
}
|
||||
}
|
||||
|
||||
@ -89,12 +86,9 @@ static inline FORCE_INLINE int lzsa_expand_match_slow(const unsigned char **ppIn
|
||||
nMatchLen += (int)((unsigned int)*pInBlock++);
|
||||
}
|
||||
else if (nByte == 255) {
|
||||
int nLargeMatchLen;
|
||||
|
||||
if ((pInBlock + 1) >= pInBlockEnd) return -1;
|
||||
nLargeMatchLen = ((unsigned int)*pInBlock++);
|
||||
nLargeMatchLen |= (((unsigned int)*pInBlock++) << 8);
|
||||
nMatchLen += nLargeMatchLen;
|
||||
nMatchLen = ((unsigned int)*pInBlock++);
|
||||
nMatchLen |= (((unsigned int)*pInBlock++) << 8);
|
||||
}
|
||||
}
|
||||
|
||||
@ -193,9 +187,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
|
||||
if (token & 0x80) {
|
||||
if (pInBlock >= pInBlockEnd) return -1;
|
||||
nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
|
||||
if (nMatchOffset == 0) break;
|
||||
}
|
||||
else {
|
||||
nMatchOffset++;
|
||||
}
|
||||
if (nMatchOffset == 0xffff) break;
|
||||
nMatchOffset++;
|
||||
|
||||
const unsigned char *pSrc = pCurOutData - nMatchOffset;
|
||||
if (pSrc < pOutData)
|
||||
@ -229,9 +225,11 @@ int lzsa_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned ch
|
||||
if (token & 0x80) {
|
||||
if (pInBlock >= pInBlockEnd) return -1;
|
||||
nMatchOffset |= (((unsigned int)*pInBlock++) << 8);
|
||||
if (nMatchOffset == 0) break;
|
||||
}
|
||||
else {
|
||||
nMatchOffset++;
|
||||
}
|
||||
if (nMatchOffset == 0xffff) break;
|
||||
nMatchOffset++;
|
||||
|
||||
const unsigned char *pSrc = pCurOutData - nMatchOffset;
|
||||
if (pSrc < pOutData)
|
||||
|
18
src/main.c
18
src/main.c
@ -225,9 +225,9 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
|
||||
unsigned char cFooter[3];
|
||||
|
||||
cFooter[0] = 0xFF; /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
|
||||
cFooter[1] = 0xFF;
|
||||
cFooter[2] = 0xFF;
|
||||
cFooter[0] = 0x00; /* EOD frame (written even in raw mode, so that the end of the data can be detected) */
|
||||
cFooter[1] = 0x00;
|
||||
cFooter[2] = 0x00;
|
||||
|
||||
if (!bError)
|
||||
bError = fwrite(cFooter, 1, 3, f_out) != 3;
|
||||
@ -370,15 +370,15 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
|
||||
(((unsigned int)cBlockSize[2]) << 16);
|
||||
}
|
||||
else {
|
||||
nBlockSize = 0xffffff;
|
||||
nBlockSize = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
nBlockSize = nFileSize - 3;
|
||||
nFileSize = 0xffffff;
|
||||
nFileSize = 0;
|
||||
}
|
||||
|
||||
if ((nBlockSize & 0x400000) == 0) {
|
||||
if (nBlockSize != 0) {
|
||||
bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
|
||||
int nDecompressedSize = 0;
|
||||
|
||||
@ -570,15 +570,15 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
|
||||
(((unsigned int)cBlockSize[2]) << 16);
|
||||
}
|
||||
else {
|
||||
nBlockSize = 0xffffff;
|
||||
nBlockSize = 0;
|
||||
}
|
||||
}
|
||||
else {
|
||||
nBlockSize = nFileSize - 3;
|
||||
nFileSize = 0xffffff;
|
||||
nFileSize = 0;
|
||||
}
|
||||
|
||||
if ((nBlockSize & 0x400000) == 0) {
|
||||
if (nBlockSize != 0) {
|
||||
bool bIsUncompressed = (nBlockSize & 0x800000) != 0;
|
||||
int nDecompressedSize = 0;
|
||||
|
||||
|
39
src/shrink.c
39
src/shrink.c
@ -410,19 +410,17 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
|
||||
*/
|
||||
static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
|
||||
if (nLength >= LITERALS_RUN_LEN) {
|
||||
nLength -= LITERALS_RUN_LEN;
|
||||
|
||||
if (nLength < 254)
|
||||
pOutData[nOutOffset++] = nLength;
|
||||
if (nLength < (LITERALS_RUN_LEN + 254))
|
||||
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN;
|
||||
else {
|
||||
if (nLength < 510) {
|
||||
if (nLength < (LITERALS_RUN_LEN + 510)) {
|
||||
pOutData[nOutOffset++] = 254;
|
||||
pOutData[nOutOffset++] = nLength - 254;
|
||||
pOutData[nOutOffset++] = nLength - LITERALS_RUN_LEN - 254;
|
||||
}
|
||||
else {
|
||||
pOutData[nOutOffset++] = 255;
|
||||
pOutData[nOutOffset++] = (nLength - 255) & 0xff;
|
||||
pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
|
||||
pOutData[nOutOffset++] = nLength & 0xff;
|
||||
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -463,19 +461,17 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
|
||||
*/
|
||||
static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffset, int nLength) {
|
||||
if (nLength >= MATCH_RUN_LEN) {
|
||||
nLength -= MATCH_RUN_LEN;
|
||||
|
||||
if (nLength < 254)
|
||||
pOutData[nOutOffset++] = nLength;
|
||||
if (nLength < (MATCH_RUN_LEN + 254))
|
||||
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN;
|
||||
else {
|
||||
if (nLength < 510) {
|
||||
if (nLength < (MATCH_RUN_LEN + 510)) {
|
||||
pOutData[nOutOffset++] = 254;
|
||||
pOutData[nOutOffset++] = nLength - 254;
|
||||
pOutData[nOutOffset++] = nLength - MATCH_RUN_LEN - 254;
|
||||
}
|
||||
else {
|
||||
pOutData[nOutOffset++] = 255;
|
||||
pOutData[nOutOffset++] = (nLength - 255) & 0xff;
|
||||
pOutData[nOutOffset++] = ((nLength - 255) >> 8) & 0xff;
|
||||
pOutData[nOutOffset++] = nLength & 0xff;
|
||||
pOutData[nOutOffset++] = (nLength >> 8) & 0xff;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -606,10 +602,13 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
nNumLiterals = 0;
|
||||
}
|
||||
|
||||
pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
|
||||
if (nNibbleLongOffset)
|
||||
pOutData[nOutOffset++] = (nMatchOffset - 1) >> 8;
|
||||
|
||||
if (nNibbleLongOffset) {
|
||||
pOutData[nOutOffset++] = nMatchOffset & 0xff;
|
||||
pOutData[nOutOffset++] = nMatchOffset >> 8;
|
||||
}
|
||||
else {
|
||||
pOutData[nOutOffset++] = (nMatchOffset - 1) & 0xff;
|
||||
}
|
||||
nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen);
|
||||
i += nMatchLen;
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user