mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-02-22 13:28:57 +00:00
Further update LZSA2 format; avoid name conflicts
This commit is contained in:
parent
272f2e7a29
commit
79ed7bf91e
4
Makefile
4
Makefile
@ -1,5 +1,5 @@
|
|||||||
CC=clang
|
CC=clang
|
||||||
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc -DHAVE_CONFIG_H
|
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
|
||||||
OBJDIR=obj
|
OBJDIR=obj
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
STRIP=strip
|
STRIP=strip
|
||||||
@ -26,9 +26,9 @@ OBJS += $(OBJDIR)/src/shrink_inmem.o
|
|||||||
OBJS += $(OBJDIR)/src/shrink_streaming.o
|
OBJS += $(OBJDIR)/src/shrink_streaming.o
|
||||||
OBJS += $(OBJDIR)/src/stream.o
|
OBJS += $(OBJDIR)/src/stream.o
|
||||||
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
|
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
|
||||||
|
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
|
||||||
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
|
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
|
||||||
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
|
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
|
||||||
OBJS += $(OBJDIR)/src/libdivsufsort/lib/utils.o
|
|
||||||
|
|
||||||
all: $(APP)
|
all: $(APP)
|
||||||
|
|
||||||
|
@ -50,7 +50,7 @@ DECODE_TOKEN
|
|||||||
BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token
|
BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token
|
||||||
|
|
||||||
JSR GETNIBBLE ; get extra literals length nibble
|
JSR GETNIBBLE ; get extra literals length nibble
|
||||||
CLC ; add nibble to len from token
|
; add nibble to len from token
|
||||||
ADC #$03 ; (LITERALS_RUN_LEN_V2)
|
ADC #$03 ; (LITERALS_RUN_LEN_V2)
|
||||||
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
|
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
|
||||||
BNE PREPARE_COPY_LITERALS ; if less, literals count is complete
|
BNE PREPARE_COPY_LITERALS ; if less, literals count is complete
|
||||||
@ -92,24 +92,20 @@ NO_LITERALS
|
|||||||
BMI OFFSET_9_BIT
|
BMI OFFSET_9_BIT
|
||||||
|
|
||||||
; 00Z: 5 bit offset
|
; 00Z: 5 bit offset
|
||||||
LSR A ; Shift Z (offset bit 4) in place
|
|
||||||
LSR A
|
|
||||||
AND #$10
|
|
||||||
STA FIXUP
|
|
||||||
|
|
||||||
LDA #$0FF ; set offset bits 15-8 to 1
|
LDX #$0FF ; set offset bits 15-8 to 1
|
||||||
STA OFFSHI
|
STX OFFSHI
|
||||||
|
|
||||||
JSR GETNIBBLE ; get nibble for offset bits 0-3
|
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
|
||||||
ORA FIXUP ; merge offset bit 4
|
ORA #$E0 ; set bits 7-5 to 1
|
||||||
ORA #$E0 ; set offset bits 7-5 to 1
|
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
|
||||||
JMP GOT_OFFSET_LO ; go store low byte of match offset and prepare match
|
|
||||||
|
|
||||||
OFFSET_9_BIT ; 01Z: 9 bit offset
|
OFFSET_9_BIT ; 01Z: 9 bit offset
|
||||||
ASL ; shift Z (offset bit 8) in place
|
ASL ; shift Z (offset bit 8) in place
|
||||||
ROL
|
ROL
|
||||||
ROL
|
ROL
|
||||||
ORA #$FE ; set offset bits 15-9 to 1
|
AND #$01
|
||||||
|
EOR #$FF ; set offset bits 15-9 to 1
|
||||||
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
|
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
|
||||||
; (*same as JMP GOT_OFFSET_HI but shorter)
|
; (*same as JMP GOT_OFFSET_HI but shorter)
|
||||||
|
|
||||||
@ -119,15 +115,8 @@ REPMATCH_OR_LARGE_OFFSET
|
|||||||
|
|
||||||
; 10Z: 13 bit offset
|
; 10Z: 13 bit offset
|
||||||
|
|
||||||
LSR A ; shift Z (offset bit 4) in place
|
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
|
||||||
LSR A
|
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
|
||||||
AND #$10
|
|
||||||
STA FIXUP
|
|
||||||
|
|
||||||
JSR GETNIBBLE ; get nibble for offset bits 8-11
|
|
||||||
ORA FIXUP ; merge offset bit 12
|
|
||||||
CLC
|
|
||||||
ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512)
|
|
||||||
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
|
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
|
||||||
; (*same as JMP GOT_OFFSET_HI but shorter)
|
; (*same as JMP GOT_OFFSET_HI but shorter)
|
||||||
|
|
||||||
@ -160,7 +149,7 @@ REP_MATCH
|
|||||||
BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token
|
BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token
|
||||||
|
|
||||||
JSR GETNIBBLE ; get extra match length nibble
|
JSR GETNIBBLE ; get extra match length nibble
|
||||||
CLC ; add nibble to len from token
|
; add nibble to len from token
|
||||||
ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2)
|
ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2)
|
||||||
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
|
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
|
||||||
BNE PREPARE_COPY_MATCH ; if less, match length is complete
|
BNE PREPARE_COPY_MATCH ; if less, match length is complete
|
||||||
@ -194,23 +183,35 @@ GETMATCH_DONE
|
|||||||
BNE COPY_MATCH_LOOP
|
BNE COPY_MATCH_LOOP
|
||||||
JMP DECODE_TOKEN
|
JMP DECODE_TOKEN
|
||||||
|
|
||||||
GETNIBBLE
|
GETCOMBINEDBITS
|
||||||
DEC NIBCOUNT
|
STA FIXUP
|
||||||
BPL HAS_NIBBLES
|
|
||||||
|
|
||||||
LDA #$01
|
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
|
||||||
STA NIBCOUNT
|
BIT FIXUP ; merge Z bit as the carry bit (for offset bit 0)
|
||||||
|
BVS COMBINEDBITZ
|
||||||
|
SEC
|
||||||
|
COMBINEDBITZ
|
||||||
|
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
|
||||||
|
RTS
|
||||||
|
|
||||||
|
GETNIBBLE
|
||||||
|
LSR NIBCOUNT
|
||||||
|
BCS HAS_NIBBLES
|
||||||
|
|
||||||
|
INC NIBCOUNT
|
||||||
JSR GETSRC ; get 2 nibbles
|
JSR GETSRC ; get 2 nibbles
|
||||||
STA NIBBLES
|
STA NIBBLES
|
||||||
LSR A
|
LSR A
|
||||||
LSR A
|
LSR A
|
||||||
LSR A
|
LSR A
|
||||||
LSR A
|
LSR A
|
||||||
|
CLC
|
||||||
RTS
|
RTS
|
||||||
|
|
||||||
HAS_NIBBLES
|
HAS_NIBBLES
|
||||||
LDA NIBBLES
|
LDA NIBBLES
|
||||||
AND #$0F ; isolate low 4 bits of nibble
|
AND #$0F ; isolate low 4 bits of nibble
|
||||||
|
CLC
|
||||||
RTS
|
RTS
|
||||||
|
|
||||||
GETPUT
|
GETPUT
|
||||||
|
@ -75,10 +75,11 @@ lzsa2_decompress:
|
|||||||
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
|
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
|
||||||
mov al,020H ; shift Z (offset bit 4) in place
|
mov al,020H ; shift Z (offset bit 4) in place
|
||||||
and al,dl
|
and al,dl
|
||||||
shr al,1
|
shl al,2
|
||||||
call .get_nibble ; get nibble for offset bits 0-3
|
call .get_nibble ; get nibble for offset bits 0-3
|
||||||
or al,cl ; merge nibble
|
or al,cl ; merge nibble
|
||||||
or al,0E0H ; set offset bits 7-5 to 1
|
rol al,1
|
||||||
|
xor al,0E1H ; set offset bits 7-5 to 1
|
||||||
dec ah ; set offset bits 15-8 to 1
|
dec ah ; set offset bits 15-8 to 1
|
||||||
jmp short .get_match_length
|
jmp short .get_match_length
|
||||||
|
|
||||||
@ -87,7 +88,7 @@ lzsa2_decompress:
|
|||||||
lodsb ; get 8 bit offset from stream in A
|
lodsb ; get 8 bit offset from stream in A
|
||||||
dec ah ; set offset bits 15-8 to 1
|
dec ah ; set offset bits 15-8 to 1
|
||||||
test dl,020H ; test bit Z (offset bit 8)
|
test dl,020H ; test bit Z (offset bit 8)
|
||||||
jne .get_match_length
|
je .get_match_length
|
||||||
dec ah ; clear bit 8 if Z bit is clear
|
dec ah ; clear bit 8 if Z bit is clear
|
||||||
jmp short .get_match_length
|
jmp short .get_match_length
|
||||||
|
|
||||||
@ -99,10 +100,11 @@ lzsa2_decompress:
|
|||||||
|
|
||||||
mov ah,020H ; shift Z (offset bit 12) in place
|
mov ah,020H ; shift Z (offset bit 12) in place
|
||||||
and ah,dl
|
and ah,dl
|
||||||
shr ah,1
|
shl ah,2
|
||||||
call .get_nibble ; get nibble for offset bits 8-11
|
call .get_nibble ; get nibble for offset bits 8-11
|
||||||
or ah,cl ; merge nibble
|
or ah,cl ; merge nibble
|
||||||
or ah,0E0H ; set offset bits 15-13 to 1
|
rol ah,1
|
||||||
|
xor ah,0E1H ; set offset bits 15-13 to 1
|
||||||
sub ah,2 ; substract 512
|
sub ah,2 ; substract 512
|
||||||
lodsb ; load match offset bits 0-7
|
lodsb ; load match offset bits 0-7
|
||||||
jmp short .get_match_length
|
jmp short .get_match_length
|
||||||
|
@ -39,8 +39,8 @@
|
|||||||
* Load dictionary contents
|
* Load dictionary contents
|
||||||
*
|
*
|
||||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||||
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
|
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
|
||||||
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
@ -91,7 +91,7 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
|
|||||||
/**
|
/**
|
||||||
* Free dictionary contents
|
* Free dictionary contents
|
||||||
*
|
*
|
||||||
* @param pDictionaryData pointer to pointer to dictionary contents
|
* @param ppDictionaryData pointer to pointer to dictionary contents
|
||||||
*/
|
*/
|
||||||
void lzsa_dictionary_free(void **ppDictionaryData) {
|
void lzsa_dictionary_free(void **ppDictionaryData) {
|
||||||
if (*ppDictionaryData) {
|
if (*ppDictionaryData) {
|
||||||
|
@ -35,12 +35,16 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Load dictionary contents
|
* Load dictionary contents
|
||||||
*
|
*
|
||||||
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
* @param pszDictionaryFilename name of dictionary file, or NULL for none
|
||||||
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
|
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
|
||||||
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
@ -49,8 +53,12 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
|
|||||||
/**
|
/**
|
||||||
* Free dictionary contents
|
* Free dictionary contents
|
||||||
*
|
*
|
||||||
* @param pDictionaryData pointer to pointer to dictionary contents
|
* @param ppDictionaryData pointer to pointer to dictionary contents
|
||||||
*/
|
*/
|
||||||
void lzsa_dictionary_free(void **ppDictionaryData);
|
void lzsa_dictionary_free(void **ppDictionaryData);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _DICTIONARY_H */
|
#endif /* _DICTIONARY_H */
|
||||||
|
@ -113,7 +113,7 @@ static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppI
|
|||||||
* Decompress one LZSA1 data block
|
* Decompress one LZSA1 data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
* Decompress one LZSA1 data block
|
* Decompress one LZSA1 data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
|
@ -99,7 +99,7 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock
|
|||||||
* Decompress one LZSA2 data block
|
* Decompress one LZSA2 data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
@ -152,9 +152,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
|
|||||||
/* 5 bit offset */
|
/* 5 bit offset */
|
||||||
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
||||||
return -1;
|
return -1;
|
||||||
nMatchOffset = nValue;
|
nMatchOffset = nValue << 1;
|
||||||
nMatchOffset |= ((token & 0x20) >> 1);
|
nMatchOffset |= ((token & 0x20) >> 5);
|
||||||
nMatchOffset ^= 0x1f;
|
nMatchOffset ^= 0x1e;
|
||||||
nMatchOffset++;
|
nMatchOffset++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -162,7 +162,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
|
|||||||
/* 9 bit offset */
|
/* 9 bit offset */
|
||||||
nMatchOffset = (unsigned int)(*pInBlock++);
|
nMatchOffset = (unsigned int)(*pInBlock++);
|
||||||
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
|
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
|
||||||
nMatchOffset ^= 0x1ff;
|
nMatchOffset ^= 0x0ff;
|
||||||
nMatchOffset++;
|
nMatchOffset++;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
@ -171,9 +171,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
|
|||||||
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
|
||||||
return -1;
|
return -1;
|
||||||
nMatchOffset = (unsigned int)(*pInBlock++);
|
nMatchOffset = (unsigned int)(*pInBlock++);
|
||||||
nMatchOffset |= (nValue << 8);
|
nMatchOffset |= (nValue << 9);
|
||||||
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
|
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
|
||||||
nMatchOffset ^= 0x1fff;
|
nMatchOffset ^= 0x1eff;
|
||||||
nMatchOffset += (512 + 1);
|
nMatchOffset += (512 + 1);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
@ -37,7 +37,7 @@
|
|||||||
* Decompress one LZSA2 data block
|
* Decompress one LZSA2 data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
|
@ -40,14 +40,15 @@
|
|||||||
* Decompress one data block
|
* Decompress one data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
|
* @param nFormatVersion version of format to use (1-2)
|
||||||
*
|
*
|
||||||
* @return size of decompressed data in bytes, or -1 for error
|
* @return size of decompressed data in bytes, or -1 for error
|
||||||
*/
|
*/
|
||||||
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
|
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion) {
|
||||||
if (nFormatVersion == 1)
|
if (nFormatVersion == 1)
|
||||||
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
|
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
|
||||||
else if (nFormatVersion == 2)
|
else if (nFormatVersion == 2)
|
||||||
|
@ -35,17 +35,26 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Decompress one data block
|
* Decompress one data block
|
||||||
*
|
*
|
||||||
* @param pInBlock pointer to compressed data
|
* @param pInBlock pointer to compressed data
|
||||||
* @param nInBlockSize size of compressed data, in bytes
|
* @param nBlockSize size of compressed data, in bytes
|
||||||
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
|
||||||
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
|
||||||
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
* @param nBlockMaxSize total size of output decompression buffer, in bytes
|
||||||
|
* @param nFormatVersion version of format to use (1-2)
|
||||||
*
|
*
|
||||||
* @return size of decompressed data in bytes, or -1 for error
|
* @return size of decompressed data in bytes, or -1 for error
|
||||||
*/
|
*/
|
||||||
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
|
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _EXPAND_CONTEXT_H */
|
#endif /* _EXPAND_CONTEXT_H */
|
||||||
|
@ -135,7 +135,7 @@ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOut
|
|||||||
if ((pCurFileData + nBlockDataSize) > pEndFileData)
|
if ((pCurFileData + nBlockDataSize) > pEndFileData)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
|
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), nFormatVersion);
|
||||||
if (nDecompressedSize < 0)
|
if (nDecompressedSize < 0)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
|
@ -35,6 +35,10 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get maximum decompressed size of compressed data
|
* Get maximum decompressed size of compressed data
|
||||||
*
|
*
|
||||||
@ -58,4 +62,8 @@ size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size
|
|||||||
*/
|
*/
|
||||||
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
|
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _EXPAND_INMEM_H */
|
#endif /* _EXPAND_INMEM_H */
|
||||||
|
@ -103,7 +103,6 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut
|
|||||||
*/
|
*/
|
||||||
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
||||||
long long *pOriginalSize, long long *pCompressedSize) {
|
long long *pOriginalSize, long long *pCompressedSize) {
|
||||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
|
||||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||||
unsigned char cFrameData[16];
|
unsigned char cFrameData[16];
|
||||||
unsigned char *pInBlock;
|
unsigned char *pInBlock;
|
||||||
@ -201,9 +200,7 @@ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pO
|
|||||||
nDecompressedSize = nBlockSize;
|
nDecompressedSize = nBlockSize;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
unsigned int nBlockOffs = 0;
|
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion);
|
||||||
|
|
||||||
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
|
|
||||||
if (nDecompressedSize < 0) {
|
if (nDecompressedSize < 0) {
|
||||||
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
|
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
|
||||||
break;
|
break;
|
||||||
|
@ -35,6 +35,10 @@
|
|||||||
|
|
||||||
#include "stream.h"
|
#include "stream.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
typedef enum _lzsa_status_t lzsa_status_t;
|
typedef enum _lzsa_status_t lzsa_status_t;
|
||||||
|
|
||||||
@ -75,4 +79,8 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut
|
|||||||
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
|
||||||
long long *pOriginalSize, long long *pCompressedSize);
|
long long *pOriginalSize, long long *pCompressedSize);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _EXPAND_STREAMING_H */
|
#endif /* _EXPAND_STREAMING_H */
|
||||||
|
@ -33,6 +33,10 @@
|
|||||||
#ifndef _FRAME_H
|
#ifndef _FRAME_H
|
||||||
#define _FRAME_H
|
#define _FRAME_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get compressed file header size
|
* Get compressed file header size
|
||||||
*
|
*
|
||||||
@ -111,4 +115,8 @@ int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize
|
|||||||
*/
|
*/
|
||||||
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
|
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _FRAME_H */
|
#endif /* _FRAME_H */
|
||||||
|
@ -44,6 +44,10 @@
|
|||||||
#include "expand_streaming.h"
|
#include "expand_streaming.h"
|
||||||
#include "expand_inmem.h"
|
#include "expand_inmem.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/** High level status for compression and decompression */
|
/** High level status for compression and decompression */
|
||||||
typedef enum _lzsa_status_t {
|
typedef enum _lzsa_status_t {
|
||||||
LZSA_OK = 0, /**< Success */
|
LZSA_OK = 0, /**< Success */
|
||||||
@ -66,4 +70,8 @@ typedef enum _lzsa_status_t {
|
|||||||
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
|
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
|
||||||
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
|
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _LIB_H */
|
#endif /* _LIB_H */
|
||||||
|
9
src/libdivsufsort/include/divsufsort_config.h
Normal file
9
src/libdivsufsort/include/divsufsort_config.h
Normal file
@ -0,0 +1,9 @@
|
|||||||
|
#define HAVE_STRING_H 1
|
||||||
|
#define HAVE_STDLIB_H 1
|
||||||
|
#define HAVE_MEMORY_H 1
|
||||||
|
#define HAVE_STDINT_H 1
|
||||||
|
#define INLINE inline
|
||||||
|
|
||||||
|
#ifdef _MSC_VER
|
||||||
|
#pragma warning( disable : 4244 )
|
||||||
|
#endif /* _MSC_VER */
|
@ -31,9 +31,7 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif /* __cplusplus */
|
#endif /* __cplusplus */
|
||||||
|
|
||||||
#if HAVE_CONFIG_H
|
#include "divsufsort_config.h"
|
||||||
# include "config.h"
|
|
||||||
#endif
|
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#if HAVE_STRING_H
|
#if HAVE_STRING_H
|
||||||
|
383
src/libdivsufsort/lib/divsufsort_utils.c
Normal file
383
src/libdivsufsort/lib/divsufsort_utils.c
Normal file
@ -0,0 +1,383 @@
|
|||||||
|
/*
|
||||||
|
* utils.c for libdivsufsort
|
||||||
|
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||||
|
*
|
||||||
|
* Permission is hereby granted, free of charge, to any person
|
||||||
|
* obtaining a copy of this software and associated documentation
|
||||||
|
* files (the "Software"), to deal in the Software without
|
||||||
|
* restriction, including without limitation the rights to use,
|
||||||
|
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||||
|
* copies of the Software, and to permit persons to whom the
|
||||||
|
* Software is furnished to do so, subject to the following
|
||||||
|
* conditions:
|
||||||
|
*
|
||||||
|
* The above copyright notice and this permission notice shall be
|
||||||
|
* included in all copies or substantial portions of the Software.
|
||||||
|
*
|
||||||
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||||
|
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||||
|
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||||
|
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||||
|
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||||
|
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||||
|
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||||
|
* OTHER DEALINGS IN THE SOFTWARE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "divsufsort_private.h"
|
||||||
|
|
||||||
|
|
||||||
|
/*- Private Function -*/
|
||||||
|
|
||||||
|
#if 0
|
||||||
|
/* Binary search for inverse bwt. */
|
||||||
|
static
|
||||||
|
saidx_t
|
||||||
|
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
|
||||||
|
saidx_t half, i;
|
||||||
|
for(i = 0, half = size >> 1;
|
||||||
|
0 < size;
|
||||||
|
size = half, half >>= 1) {
|
||||||
|
if(A[i + half] < value) {
|
||||||
|
i += half + 1;
|
||||||
|
half -= (size & 1) ^ 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
/*- Functions -*/
|
||||||
|
|
||||||
|
/* Burrows-Wheeler transform. */
|
||||||
|
saint_t
|
||||||
|
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
|
||||||
|
saidx_t n, saidx_t *idx) {
|
||||||
|
saidx_t *A, i, j, p, t;
|
||||||
|
saint_t c;
|
||||||
|
|
||||||
|
/* Check arguments. */
|
||||||
|
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
|
||||||
|
if(n <= 1) {
|
||||||
|
if(n == 1) { U[0] = T[0]; }
|
||||||
|
*idx = n;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
if((A = SA) == NULL) {
|
||||||
|
i = divbwt(T, U, NULL, n);
|
||||||
|
if(0 <= i) { *idx = i; i = 0; }
|
||||||
|
return (saint_t)i;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* BW transform. */
|
||||||
|
if(T == U) {
|
||||||
|
t = n;
|
||||||
|
for(i = 0, j = 0; i < n; ++i) {
|
||||||
|
p = t - 1;
|
||||||
|
t = A[i];
|
||||||
|
if(0 <= p) {
|
||||||
|
c = T[j];
|
||||||
|
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
|
||||||
|
A[j] = c;
|
||||||
|
j++;
|
||||||
|
} else {
|
||||||
|
*idx = i;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
p = t - 1;
|
||||||
|
if(0 <= p) {
|
||||||
|
c = T[j];
|
||||||
|
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
|
||||||
|
A[j] = c;
|
||||||
|
} else {
|
||||||
|
*idx = i;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
U[0] = T[n - 1];
|
||||||
|
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
|
||||||
|
*idx = i + 1;
|
||||||
|
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
|
||||||
|
}
|
||||||
|
|
||||||
|
if(SA == NULL) {
|
||||||
|
/* Deallocate memory. */
|
||||||
|
free(A);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inverse Burrows-Wheeler transform. */
|
||||||
|
saint_t
|
||||||
|
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
|
||||||
|
saidx_t n, saidx_t idx) {
|
||||||
|
saidx_t C[ALPHABET_SIZE];
|
||||||
|
sauchar_t D[ALPHABET_SIZE];
|
||||||
|
saidx_t *B;
|
||||||
|
saidx_t i, p;
|
||||||
|
saint_t c, d;
|
||||||
|
|
||||||
|
/* Check arguments. */
|
||||||
|
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
|
||||||
|
(n < idx) || ((0 < n) && (idx == 0))) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(n <= 1) { return 0; }
|
||||||
|
|
||||||
|
if((B = A) == NULL) {
|
||||||
|
/* Allocate n*sizeof(saidx_t) bytes of memory. */
|
||||||
|
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Inverse BW transform. */
|
||||||
|
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
|
||||||
|
for(i = 0; i < n; ++i) { ++C[T[i]]; }
|
||||||
|
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
|
||||||
|
p = C[c];
|
||||||
|
if(0 < p) {
|
||||||
|
C[c] = i;
|
||||||
|
D[d++] = (sauchar_t)c;
|
||||||
|
i += p;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
|
||||||
|
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
|
||||||
|
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
|
||||||
|
for(i = 0, p = idx; i < n; ++i) {
|
||||||
|
U[i] = D[binarysearch_lower(C, d, p)];
|
||||||
|
p = B[p - 1];
|
||||||
|
}
|
||||||
|
|
||||||
|
if(A == NULL) {
|
||||||
|
/* Deallocate memory. */
|
||||||
|
free(B);
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Checks the suffix array SA of the string T. */
|
||||||
|
saint_t
|
||||||
|
sufcheck(const sauchar_t *T, const saidx_t *SA,
|
||||||
|
saidx_t n, saint_t verbose) {
|
||||||
|
saidx_t C[ALPHABET_SIZE];
|
||||||
|
saidx_t i, p, q, t;
|
||||||
|
saint_t c;
|
||||||
|
|
||||||
|
if(verbose) { fprintf(stderr, "sufcheck: "); }
|
||||||
|
|
||||||
|
/* Check arguments. */
|
||||||
|
if((T == NULL) || (SA == NULL) || (n < 0)) {
|
||||||
|
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
if(n == 0) {
|
||||||
|
if(verbose) { fprintf(stderr, "Done.\n"); }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check range: [0..n-1] */
|
||||||
|
for(i = 0; i < n; ++i) {
|
||||||
|
if((SA[i] < 0) || (n <= SA[i])) {
|
||||||
|
if(verbose) {
|
||||||
|
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
|
||||||
|
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
|
||||||
|
n - 1, i, SA[i]);
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check first characters. */
|
||||||
|
for(i = 1; i < n; ++i) {
|
||||||
|
if(T[SA[i - 1]] > T[SA[i]]) {
|
||||||
|
if(verbose) {
|
||||||
|
fprintf(stderr, "Suffixes in wrong order.\n"
|
||||||
|
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
|
||||||
|
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
|
||||||
|
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
|
||||||
|
}
|
||||||
|
return -3;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* check suffixes. */
|
||||||
|
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
|
||||||
|
for(i = 0; i < n; ++i) { ++C[T[i]]; }
|
||||||
|
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
|
||||||
|
t = C[i];
|
||||||
|
C[i] = p;
|
||||||
|
p += t;
|
||||||
|
}
|
||||||
|
|
||||||
|
q = C[T[n - 1]];
|
||||||
|
C[T[n - 1]] += 1;
|
||||||
|
for(i = 0; i < n; ++i) {
|
||||||
|
p = SA[i];
|
||||||
|
if(0 < p) {
|
||||||
|
c = T[--p];
|
||||||
|
t = C[c];
|
||||||
|
} else {
|
||||||
|
c = T[p = n - 1];
|
||||||
|
t = q;
|
||||||
|
}
|
||||||
|
if((t < 0) || (p != SA[t])) {
|
||||||
|
if(verbose) {
|
||||||
|
fprintf(stderr, "Suffix in wrong position.\n"
|
||||||
|
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
|
||||||
|
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
|
||||||
|
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
|
||||||
|
}
|
||||||
|
return -4;
|
||||||
|
}
|
||||||
|
if(t != q) {
|
||||||
|
++C[c];
|
||||||
|
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static
|
||||||
|
int
|
||||||
|
_compare(const sauchar_t *T, saidx_t Tsize,
|
||||||
|
const sauchar_t *P, saidx_t Psize,
|
||||||
|
saidx_t suf, saidx_t *match) {
|
||||||
|
saidx_t i, j;
|
||||||
|
saint_t r;
|
||||||
|
for(i = suf + *match, j = *match, r = 0;
|
||||||
|
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
|
||||||
|
*match = j;
|
||||||
|
return (r == 0) ? -(j != Psize) : r;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search for the pattern P in the string T. */
|
||||||
|
saidx_t
|
||||||
|
sa_search(const sauchar_t *T, saidx_t Tsize,
|
||||||
|
const sauchar_t *P, saidx_t Psize,
|
||||||
|
const saidx_t *SA, saidx_t SAsize,
|
||||||
|
saidx_t *idx) {
|
||||||
|
saidx_t size, lsize, rsize, half;
|
||||||
|
saidx_t match, lmatch, rmatch;
|
||||||
|
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
|
||||||
|
saidx_t i, j, k;
|
||||||
|
saint_t r;
|
||||||
|
|
||||||
|
if(idx != NULL) { *idx = -1; }
|
||||||
|
if((T == NULL) || (P == NULL) || (SA == NULL) ||
|
||||||
|
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
|
||||||
|
if((Tsize == 0) || (SAsize == 0)) { return 0; }
|
||||||
|
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
|
||||||
|
|
||||||
|
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
|
||||||
|
0 < size;
|
||||||
|
size = half, half >>= 1) {
|
||||||
|
match = MIN(lmatch, rmatch);
|
||||||
|
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
|
||||||
|
if(r < 0) {
|
||||||
|
i += half + 1;
|
||||||
|
half -= (size & 1) ^ 1;
|
||||||
|
lmatch = match;
|
||||||
|
} else if(r > 0) {
|
||||||
|
rmatch = match;
|
||||||
|
} else {
|
||||||
|
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
|
||||||
|
|
||||||
|
/* left part */
|
||||||
|
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
|
||||||
|
0 < lsize;
|
||||||
|
lsize = half, half >>= 1) {
|
||||||
|
lmatch = MIN(llmatch, lrmatch);
|
||||||
|
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
|
||||||
|
if(r < 0) {
|
||||||
|
j += half + 1;
|
||||||
|
half -= (lsize & 1) ^ 1;
|
||||||
|
llmatch = lmatch;
|
||||||
|
} else {
|
||||||
|
lrmatch = lmatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* right part */
|
||||||
|
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
|
||||||
|
0 < rsize;
|
||||||
|
rsize = half, half >>= 1) {
|
||||||
|
rmatch = MIN(rlmatch, rrmatch);
|
||||||
|
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
|
||||||
|
if(r <= 0) {
|
||||||
|
k += half + 1;
|
||||||
|
half -= (rsize & 1) ^ 1;
|
||||||
|
rlmatch = rmatch;
|
||||||
|
} else {
|
||||||
|
rrmatch = rmatch;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
|
||||||
|
return k - j;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Search for the character c in the string T. */
|
||||||
|
saidx_t
|
||||||
|
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
|
||||||
|
const saidx_t *SA, saidx_t SAsize,
|
||||||
|
saint_t c, saidx_t *idx) {
|
||||||
|
saidx_t size, lsize, rsize, half;
|
||||||
|
saidx_t i, j, k, p;
|
||||||
|
saint_t r;
|
||||||
|
|
||||||
|
if(idx != NULL) { *idx = -1; }
|
||||||
|
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
|
||||||
|
if((Tsize == 0) || (SAsize == 0)) { return 0; }
|
||||||
|
|
||||||
|
for(i = j = k = 0, size = SAsize, half = size >> 1;
|
||||||
|
0 < size;
|
||||||
|
size = half, half >>= 1) {
|
||||||
|
p = SA[i + half];
|
||||||
|
r = (p < Tsize) ? T[p] - c : -1;
|
||||||
|
if(r < 0) {
|
||||||
|
i += half + 1;
|
||||||
|
half -= (size & 1) ^ 1;
|
||||||
|
} else if(r == 0) {
|
||||||
|
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
|
||||||
|
|
||||||
|
/* left part */
|
||||||
|
for(half = lsize >> 1;
|
||||||
|
0 < lsize;
|
||||||
|
lsize = half, half >>= 1) {
|
||||||
|
p = SA[j + half];
|
||||||
|
r = (p < Tsize) ? T[p] - c : -1;
|
||||||
|
if(r < 0) {
|
||||||
|
j += half + 1;
|
||||||
|
half -= (lsize & 1) ^ 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* right part */
|
||||||
|
for(half = rsize >> 1;
|
||||||
|
0 < rsize;
|
||||||
|
rsize = half, half >>= 1) {
|
||||||
|
p = SA[k + half];
|
||||||
|
r = (p < Tsize) ? T[p] - c : -1;
|
||||||
|
if(r <= 0) {
|
||||||
|
k += half + 1;
|
||||||
|
half -= (rsize & 1) ^ 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
|
||||||
|
return k - j;
|
||||||
|
}
|
||||||
|
#endif
|
@ -772,7 +772,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
|
|||||||
for (i = 0; i < 50; i++) {
|
for (i = 0; i < 50; i++) {
|
||||||
long long t0 = do_get_time();
|
long long t0 = do_get_time();
|
||||||
if (nOptions & OPT_RAW)
|
if (nOptions & OPT_RAW)
|
||||||
nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
|
nActualDecompressedSize = lzsa_decompressor_expand_block(pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize, nFormatVersion);
|
||||||
else
|
else
|
||||||
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
|
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
|
||||||
long long t1 = do_get_time();
|
long long t1 = do_get_time();
|
||||||
|
@ -33,6 +33,10 @@
|
|||||||
#ifndef _MATCHFINDER_H
|
#ifndef _MATCHFINDER_H
|
||||||
#define _MATCHFINDER_H
|
#define _MATCHFINDER_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward declarations */
|
/* Forward declarations */
|
||||||
typedef struct _lzsa_match lzsa_match;
|
typedef struct _lzsa_match lzsa_match;
|
||||||
typedef struct _lzsa_compressor lzsa_compressor;
|
typedef struct _lzsa_compressor lzsa_compressor;
|
||||||
@ -79,4 +83,8 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con
|
|||||||
*/
|
*/
|
||||||
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _MATCHFINDER_H */
|
#endif /* _MATCHFINDER_H */
|
||||||
|
@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
|
|||||||
*
|
*
|
||||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||||
*/
|
*/
|
||||||
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||||
|
|
||||||
#endif /* _SHRINK_BLOCK_V1_H */
|
#endif /* _SHRINK_BLOCK_V1_H */
|
||||||
|
@ -566,7 +566,6 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
int nOutOffset = 0;
|
int nOutOffset = 0;
|
||||||
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
|
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
|
||||||
int nRepMatchOffset = 0;
|
int nRepMatchOffset = 0;
|
||||||
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
|
|
||||||
|
|
||||||
for (i = nStartOffset; i < nEndOffset; ) {
|
for (i = nStartOffset; i < nEndOffset; ) {
|
||||||
lzsa_match *pMatch = pCompressor->best_match + i;
|
lzsa_match *pMatch = pCompressor->best_match + i;
|
||||||
@ -586,15 +585,15 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
if (nMatchOffset <= 32) {
|
if (nMatchOffset <= 32) {
|
||||||
nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
|
nTokenOffsetMode = 0x00 | ((((-nMatchOffset) & 0x01) << 5) ^ 0x20);
|
||||||
nOffsetSize = 4;
|
nOffsetSize = 4;
|
||||||
}
|
}
|
||||||
else if (nMatchOffset <= 512) {
|
else if (nMatchOffset <= 512) {
|
||||||
nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
|
nTokenOffsetMode = 0x40 | ((((-nMatchOffset) & 0x100) >> 3) ^ 0x20);
|
||||||
nOffsetSize = 8;
|
nOffsetSize = 8;
|
||||||
}
|
}
|
||||||
else if (nMatchOffset <= (8192 + 512)) {
|
else if (nMatchOffset <= (8192 + 512)) {
|
||||||
nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
|
nTokenOffsetMode = 0x80 | ((((-(nMatchOffset - 512)) & 0x0100) >> 3) ^ 0x20);
|
||||||
nOffsetSize = 12;
|
nOffsetSize = 12;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -621,14 +620,14 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
|
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
|
||||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
|
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-nMatchOffset) & 0x1e) >> 1);
|
||||||
if (nOutOffset < 0) return -1;
|
if (nOutOffset < 0) return -1;
|
||||||
}
|
}
|
||||||
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
|
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
|
||||||
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
||||||
}
|
}
|
||||||
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
|
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
|
||||||
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
|
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 9) & 0x0f);
|
||||||
if (nOutOffset < 0) return -1;
|
if (nOutOffset < 0) return -1;
|
||||||
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
|
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
|
|||||||
*
|
*
|
||||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||||
*/
|
*/
|
||||||
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
|
||||||
|
|
||||||
#endif /* _SHRINK_BLOCK_V2_H */
|
#endif /* _SHRINK_BLOCK_V2_H */
|
||||||
|
@ -35,6 +35,10 @@
|
|||||||
|
|
||||||
#include "divsufsort.h"
|
#include "divsufsort.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
#define LCP_BITS 15
|
#define LCP_BITS 15
|
||||||
#define LCP_MAX (1U<<(LCP_BITS - 1))
|
#define LCP_MAX (1U<<(LCP_BITS - 1))
|
||||||
#define LCP_SHIFT (32-LCP_BITS)
|
#define LCP_SHIFT (32-LCP_BITS)
|
||||||
@ -120,4 +124,8 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned ch
|
|||||||
*/
|
*/
|
||||||
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
|
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SHRINK_CONTEXT_H */
|
#endif /* _SHRINK_CONTEXT_H */
|
||||||
|
@ -41,8 +41,7 @@
|
|||||||
/**
|
/**
|
||||||
* Get maximum compressed size of input(source) data
|
* Get maximum compressed size of input(source) data
|
||||||
*
|
*
|
||||||
* @param pFileData pointer to input(source) data
|
* @param nInputSize input(source) size in bytes
|
||||||
* @param nFileSize input(source) size in bytes
|
|
||||||
*
|
*
|
||||||
* @return maximum compressed size
|
* @return maximum compressed size
|
||||||
*/
|
*/
|
||||||
|
@ -35,11 +35,14 @@
|
|||||||
|
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Get maximum compressed size of input(source) data
|
* Get maximum compressed size of input(source) data
|
||||||
*
|
*
|
||||||
* @param pFileData pointer to input(source) data
|
* @param nInputSize input(source) size in bytes
|
||||||
* @param nFileSize input(source) size in bytes
|
|
||||||
*
|
*
|
||||||
* @return maximum compressed size
|
* @return maximum compressed size
|
||||||
*/
|
*/
|
||||||
@ -61,4 +64,8 @@ size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
|
|||||||
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
|
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
|
||||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
|
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SHRINK_INMEM_H */
|
#endif /* _SHRINK_INMEM_H */
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* shrink_streaming.h - streaming compression definitions
|
* shrink_streaming.c - streaming compression implementation
|
||||||
*
|
*
|
||||||
* Copyright (C) 2019 Emmanuel Marty
|
* Copyright (C) 2019 Emmanuel Marty
|
||||||
*
|
*
|
||||||
@ -111,7 +111,6 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
|
|||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
|
||||||
unsigned char *pInData, *pOutData;
|
unsigned char *pInData, *pOutData;
|
||||||
lzsa_compressor compressor;
|
lzsa_compressor compressor;
|
||||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
|
||||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||||
int nResult;
|
int nResult;
|
||||||
unsigned char cFrameData[16];
|
unsigned char cFrameData[16];
|
||||||
|
@ -35,6 +35,10 @@
|
|||||||
|
|
||||||
#include "stream.h"
|
#include "stream.h"
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
typedef enum _lzsa_status_t lzsa_status_t;
|
typedef enum _lzsa_status_t lzsa_status_t;
|
||||||
|
|
||||||
@ -83,4 +87,8 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
|
|||||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _SHRINK_STREAMING_H */
|
#endif /* _SHRINK_STREAMING_H */
|
||||||
|
@ -33,6 +33,10 @@
|
|||||||
#ifndef _STREAM_H
|
#ifndef _STREAM_H
|
||||||
#define _STREAM_H
|
#define _STREAM_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
typedef struct _lzsa_stream_t lzsa_stream_t;
|
typedef struct _lzsa_stream_t lzsa_stream_t;
|
||||||
|
|
||||||
@ -92,4 +96,8 @@ typedef struct _lzsa_stream_t {
|
|||||||
*/
|
*/
|
||||||
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
|
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
#endif /* _STREAM_H */
|
#endif /* _STREAM_H */
|
||||||
|
Loading…
x
Reference in New Issue
Block a user