diff --git a/Makefile b/Makefile index 2181554..9e98565 100755 --- a/Makefile +++ b/Makefile @@ -1,5 +1,5 @@ CC=clang -CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc -DHAVE_CONFIG_H +CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc OBJDIR=obj LDFLAGS= STRIP=strip @@ -26,9 +26,9 @@ OBJS += $(OBJDIR)/src/shrink_inmem.o OBJS += $(OBJDIR)/src/shrink_streaming.o OBJS += $(OBJDIR)/src/stream.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o +OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o -OBJS += $(OBJDIR)/src/libdivsufsort/lib/utils.o all: $(APP) diff --git a/asm/6502/decompress_v2.asm b/asm/6502/decompress_v2.asm index 6514aaf..3bd3093 100755 --- a/asm/6502/decompress_v2.asm +++ b/asm/6502/decompress_v2.asm @@ -50,7 +50,7 @@ DECODE_TOKEN BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token JSR GETNIBBLE ; get extra literals length nibble - CLC ; add nibble to len from token + ; add nibble to len from token ADC #$03 ; (LITERALS_RUN_LEN_V2) CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ? BNE PREPARE_COPY_LITERALS ; if less, literals count is complete @@ -92,24 +92,20 @@ NO_LITERALS BMI OFFSET_9_BIT ; 00Z: 5 bit offset - LSR A ; Shift Z (offset bit 4) in place - LSR A - AND #$10 - STA FIXUP - LDA #$0FF ; set offset bits 15-8 to 1 - STA OFFSHI + LDX #$0FF ; set offset bits 15-8 to 1 + STX OFFSHI - JSR GETNIBBLE ; get nibble for offset bits 0-3 - ORA FIXUP ; merge offset bit 4 - ORA #$E0 ; set offset bits 7-5 to 1 - JMP GOT_OFFSET_LO ; go store low byte of match offset and prepare match + JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1 + ORA #$E0 ; set bits 7-5 to 1 + BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match OFFSET_9_BIT ; 01Z: 9 bit offset ASL ; shift Z (offset bit 8) in place ROL ROL - ORA #$FE ; set offset bits 15-9 to 1 + AND #$01 + EOR #$FF ; set offset bits 15-9 to 1 BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match ; (*same as JMP GOT_OFFSET_HI but shorter) @@ -119,15 +115,8 @@ REPMATCH_OR_LARGE_OFFSET ; 10Z: 13 bit offset - LSR A ; shift Z (offset bit 4) in place - LSR A - AND #$10 - STA FIXUP - - JSR GETNIBBLE ; get nibble for offset bits 8-11 - ORA FIXUP ; merge offset bit 12 - CLC - ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512) + JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9 + ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512) BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match ; (*same as JMP GOT_OFFSET_HI but shorter) @@ -160,7 +149,7 @@ REP_MATCH BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token JSR GETNIBBLE ; get extra match length nibble - CLC ; add nibble to len from token + ; add nibble to len from token ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2) CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15? BNE PREPARE_COPY_MATCH ; if less, match length is complete @@ -194,23 +183,35 @@ GETMATCH_DONE BNE COPY_MATCH_LOOP JMP DECODE_TOKEN -GETNIBBLE - DEC NIBCOUNT - BPL HAS_NIBBLES +GETCOMBINEDBITS + STA FIXUP - LDA #$01 - STA NIBCOUNT + JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4) + BIT FIXUP ; merge Z bit as the carry bit (for offset bit 0) + BVS COMBINEDBITZ + SEC +COMBINEDBITZ + ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared + RTS + +GETNIBBLE + LSR NIBCOUNT + BCS HAS_NIBBLES + + INC NIBCOUNT JSR GETSRC ; get 2 nibbles STA NIBBLES LSR A LSR A LSR A LSR A + CLC RTS HAS_NIBBLES LDA NIBBLES AND #$0F ; isolate low 4 bits of nibble + CLC RTS GETPUT diff --git a/asm/8088/decompress_small_v2.S b/asm/8088/decompress_small_v2.S index cea0c74..03d883b 100755 --- a/asm/8088/decompress_small_v2.S +++ b/asm/8088/decompress_small_v2.S @@ -75,10 +75,11 @@ lzsa2_decompress: xchg ax,cx ; clear ah - cx is zero from the rep movsb above mov al,020H ; shift Z (offset bit 4) in place and al,dl - shr al,1 + shl al,2 call .get_nibble ; get nibble for offset bits 0-3 or al,cl ; merge nibble - or al,0E0H ; set offset bits 7-5 to 1 + rol al,1 + xor al,0E1H ; set offset bits 7-5 to 1 dec ah ; set offset bits 15-8 to 1 jmp short .get_match_length @@ -87,7 +88,7 @@ lzsa2_decompress: lodsb ; get 8 bit offset from stream in A dec ah ; set offset bits 15-8 to 1 test dl,020H ; test bit Z (offset bit 8) - jne .get_match_length + je .get_match_length dec ah ; clear bit 8 if Z bit is clear jmp short .get_match_length @@ -99,10 +100,11 @@ lzsa2_decompress: mov ah,020H ; shift Z (offset bit 12) in place and ah,dl - shr ah,1 + shl ah,2 call .get_nibble ; get nibble for offset bits 8-11 or ah,cl ; merge nibble - or ah,0E0H ; set offset bits 15-13 to 1 + rol ah,1 + xor ah,0E1H ; set offset bits 15-13 to 1 sub ah,2 ; substract 512 lodsb ; load match offset bits 0-7 jmp short .get_match_length diff --git a/src/dictionary.c b/src/dictionary.c index 4f10cc4..1b1fe3b 100644 --- a/src/dictionary.c +++ b/src/dictionary.c @@ -39,8 +39,8 @@ * Load dictionary contents * * @param pszDictionaryFilename name of dictionary file, or NULL for none - * @param pDictionaryData pointer to returned dictionary contents, or NULL for none - * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * @param ppDictionaryData pointer to returned dictionary contents, or NULL for none + * @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0 * * @return LZSA_OK for success, or an error value from lzsa_status_t */ @@ -91,7 +91,7 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD /** * Free dictionary contents * - * @param pDictionaryData pointer to pointer to dictionary contents + * @param ppDictionaryData pointer to pointer to dictionary contents */ void lzsa_dictionary_free(void **ppDictionaryData) { if (*ppDictionaryData) { diff --git a/src/dictionary.h b/src/dictionary.h index 9e61296..a363564 100644 --- a/src/dictionary.h +++ b/src/dictionary.h @@ -35,12 +35,16 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Load dictionary contents * * @param pszDictionaryFilename name of dictionary file, or NULL for none - * @param pDictionaryData pointer to returned dictionary contents, or NULL for none - * @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0 + * @param ppDictionaryData pointer to returned dictionary contents, or NULL for none + * @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0 * * @return LZSA_OK for success, or an error value from lzsa_status_t */ @@ -49,8 +53,12 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD /** * Free dictionary contents * - * @param pDictionaryData pointer to pointer to dictionary contents + * @param ppDictionaryData pointer to pointer to dictionary contents */ void lzsa_dictionary_free(void **ppDictionaryData); +#ifdef __cplusplus +} +#endif + #endif /* _DICTIONARY_H */ diff --git a/src/expand_block_v1.c b/src/expand_block_v1.c index 15f70d4..e3fca96 100644 --- a/src/expand_block_v1.c +++ b/src/expand_block_v1.c @@ -113,7 +113,7 @@ static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppI * Decompress one LZSA1 data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes diff --git a/src/expand_block_v1.h b/src/expand_block_v1.h index ac801ca..82bac6e 100644 --- a/src/expand_block_v1.h +++ b/src/expand_block_v1.h @@ -37,7 +37,7 @@ * Decompress one LZSA1 data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes diff --git a/src/expand_block_v2.c b/src/expand_block_v2.c index 1e7d690..05cfd92 100644 --- a/src/expand_block_v2.c +++ b/src/expand_block_v2.c @@ -99,7 +99,7 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock * Decompress one LZSA2 data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes @@ -152,9 +152,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS /* 5 bit offset */ if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) return -1; - nMatchOffset = nValue; - nMatchOffset |= ((token & 0x20) >> 1); - nMatchOffset ^= 0x1f; + nMatchOffset = nValue << 1; + nMatchOffset |= ((token & 0x20) >> 5); + nMatchOffset ^= 0x1e; nMatchOffset++; break; @@ -162,7 +162,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS /* 9 bit offset */ nMatchOffset = (unsigned int)(*pInBlock++); nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); - nMatchOffset ^= 0x1ff; + nMatchOffset ^= 0x0ff; nMatchOffset++; break; @@ -171,9 +171,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue)) return -1; nMatchOffset = (unsigned int)(*pInBlock++); - nMatchOffset |= (nValue << 8); - nMatchOffset |= (((unsigned int)(token & 0x20)) << 7); - nMatchOffset ^= 0x1fff; + nMatchOffset |= (nValue << 9); + nMatchOffset |= (((unsigned int)(token & 0x20)) << 3); + nMatchOffset ^= 0x1eff; nMatchOffset += (512 + 1); break; diff --git a/src/expand_block_v2.h b/src/expand_block_v2.h index e2c8fdb..5ccc0ca 100644 --- a/src/expand_block_v2.h +++ b/src/expand_block_v2.h @@ -37,7 +37,7 @@ * Decompress one LZSA2 data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes diff --git a/src/expand_context.c b/src/expand_context.c index 42c7fd9..3178111 100644 --- a/src/expand_context.c +++ b/src/expand_context.c @@ -40,14 +40,15 @@ * Decompress one data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes + * @param nFormatVersion version of format to use (1-2) * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) { +int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion) { if (nFormatVersion == 1) return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize); else if (nFormatVersion == 2) diff --git a/src/expand_context.h b/src/expand_context.h index 302d261..9b60c6f 100644 --- a/src/expand_context.h +++ b/src/expand_context.h @@ -35,17 +35,26 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Decompress one data block * * @param pInBlock pointer to compressed data - * @param nInBlockSize size of compressed data, in bytes + * @param nBlockSize size of compressed data, in bytes * @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block) * @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes) * @param nBlockMaxSize total size of output decompression buffer, in bytes + * @param nFormatVersion version of format to use (1-2) * * @return size of decompressed data in bytes, or -1 for error */ -int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize); +int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion); + +#ifdef __cplusplus +} +#endif #endif /* _EXPAND_CONTEXT_H */ diff --git a/src/expand_inmem.c b/src/expand_inmem.c index c5da3c8..3902aea 100644 --- a/src/expand_inmem.c +++ b/src/expand_inmem.c @@ -135,7 +135,7 @@ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOut if ((pCurFileData + nBlockDataSize) > pEndFileData) return -1; - nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize)); + nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), nFormatVersion); if (nDecompressedSize < 0) return -1; diff --git a/src/expand_inmem.h b/src/expand_inmem.h index a2baee1..44d30b7 100644 --- a/src/expand_inmem.h +++ b/src/expand_inmem.h @@ -35,6 +35,10 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Get maximum decompressed size of compressed data * @@ -58,4 +62,8 @@ size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size */ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion); +#ifdef __cplusplus +} +#endif + #endif /* _EXPAND_INMEM_H */ diff --git a/src/expand_streaming.c b/src/expand_streaming.c index d4b04ba..f96755b 100644 --- a/src/expand_streaming.c +++ b/src/expand_streaming.c @@ -103,7 +103,6 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut */ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, long long *pOriginalSize, long long *pCompressedSize) { - long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL, nCompressedSize = 0LL; unsigned char cFrameData[16]; unsigned char *pInBlock; @@ -201,9 +200,7 @@ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pO nDecompressedSize = nBlockSize; } else { - unsigned int nBlockOffs = 0; - - nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE); + nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion); if (nDecompressedSize < 0) { nDecompressionError = LZSA_ERROR_DECOMPRESSION; break; diff --git a/src/expand_streaming.h b/src/expand_streaming.h index 9958b82..30dd88d 100644 --- a/src/expand_streaming.h +++ b/src/expand_streaming.h @@ -35,6 +35,10 @@ #include "stream.h" +#ifdef __cplusplus +extern "C" { +#endif + /* Forward declaration */ typedef enum _lzsa_status_t lzsa_status_t; @@ -75,4 +79,8 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion, long long *pOriginalSize, long long *pCompressedSize); +#ifdef __cplusplus +} +#endif + #endif /* _EXPAND_STREAMING_H */ diff --git a/src/frame.h b/src/frame.h index 520b065..7b5db99 100644 --- a/src/frame.h +++ b/src/frame.h @@ -33,6 +33,10 @@ #ifndef _FRAME_H #define _FRAME_H +#ifdef __cplusplus +extern "C" { +#endif + /** * Get compressed file header size * @@ -111,4 +115,8 @@ int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize */ int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed); +#ifdef __cplusplus +} +#endif + #endif /* _FRAME_H */ diff --git a/src/lib.h b/src/lib.h index 60cda13..141469c 100755 --- a/src/lib.h +++ b/src/lib.h @@ -44,6 +44,10 @@ #include "expand_streaming.h" #include "expand_inmem.h" +#ifdef __cplusplus +extern "C" { +#endif + /** High level status for compression and decompression */ typedef enum _lzsa_status_t { LZSA_OK = 0, /**< Success */ @@ -66,4 +70,8 @@ typedef enum _lzsa_status_t { #define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */ #define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */ +#ifdef __cplusplus +} +#endif + #endif /* _LIB_H */ diff --git a/src/libdivsufsort/include/divsufsort_config.h b/src/libdivsufsort/include/divsufsort_config.h new file mode 100644 index 0000000..4054a8a --- /dev/null +++ b/src/libdivsufsort/include/divsufsort_config.h @@ -0,0 +1,9 @@ +#define HAVE_STRING_H 1 +#define HAVE_STDLIB_H 1 +#define HAVE_MEMORY_H 1 +#define HAVE_STDINT_H 1 +#define INLINE inline + +#ifdef _MSC_VER +#pragma warning( disable : 4244 ) +#endif /* _MSC_VER */ diff --git a/src/libdivsufsort/include/divsufsort_private.h b/src/libdivsufsort/include/divsufsort_private.h index 7e261c1..b4d97ad 100644 --- a/src/libdivsufsort/include/divsufsort_private.h +++ b/src/libdivsufsort/include/divsufsort_private.h @@ -31,9 +31,7 @@ extern "C" { #endif /* __cplusplus */ -#if HAVE_CONFIG_H -# include "config.h" -#endif +#include "divsufsort_config.h" #include #include #if HAVE_STRING_H diff --git a/src/libdivsufsort/lib/divsufsort_utils.c b/src/libdivsufsort/lib/divsufsort_utils.c new file mode 100644 index 0000000..f7cbc0d --- /dev/null +++ b/src/libdivsufsort/lib/divsufsort_utils.c @@ -0,0 +1,383 @@ +/* + * utils.c for libdivsufsort + * Copyright (c) 2003-2008 Yuta Mori All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, + * copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following + * conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES + * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT + * HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "divsufsort_private.h" + + +/*- Private Function -*/ + +#if 0 +/* Binary search for inverse bwt. */ +static +saidx_t +binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) { + saidx_t half, i; + for(i = 0, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + if(A[i + half] < value) { + i += half + 1; + half -= (size & 1) ^ 1; + } + } + return i; +} + + +/*- Functions -*/ + +/* Burrows-Wheeler transform. */ +saint_t +bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA, + saidx_t n, saidx_t *idx) { + saidx_t *A, i, j, p, t; + saint_t c; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; } + if(n <= 1) { + if(n == 1) { U[0] = T[0]; } + *idx = n; + return 0; + } + + if((A = SA) == NULL) { + i = divbwt(T, U, NULL, n); + if(0 <= i) { *idx = i; i = 0; } + return (saint_t)i; + } + + /* BW transform. */ + if(T == U) { + t = n; + for(i = 0, j = 0; i < n; ++i) { + p = t - 1; + t = A[i]; + if(0 <= p) { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + j++; + } else { + *idx = i; + } + } + p = t - 1; + if(0 <= p) { + c = T[j]; + U[j] = (j <= p) ? T[p] : (sauchar_t)A[p]; + A[j] = c; + } else { + *idx = i; + } + } else { + U[0] = T[n - 1]; + for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; } + *idx = i + 1; + for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; } + } + + if(SA == NULL) { + /* Deallocate memory. */ + free(A); + } + + return 0; +} + +/* Inverse Burrows-Wheeler transform. */ +saint_t +inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A, + saidx_t n, saidx_t idx) { + saidx_t C[ALPHABET_SIZE]; + sauchar_t D[ALPHABET_SIZE]; + saidx_t *B; + saidx_t i, p; + saint_t c, d; + + /* Check arguments. */ + if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) || + (n < idx) || ((0 < n) && (idx == 0))) { + return -1; + } + if(n <= 1) { return 0; } + + if((B = A) == NULL) { + /* Allocate n*sizeof(saidx_t) bytes of memory. */ + if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; } + } + + /* Inverse BW transform. */ + for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } + for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) { + p = C[c]; + if(0 < p) { + C[c] = i; + D[d++] = (sauchar_t)c; + i += p; + } + } + for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; } + for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; } + for(c = 0; c < d; ++c) { C[c] = C[D[c]]; } + for(i = 0, p = idx; i < n; ++i) { + U[i] = D[binarysearch_lower(C, d, p)]; + p = B[p - 1]; + } + + if(A == NULL) { + /* Deallocate memory. */ + free(B); + } + + return 0; +} + +/* Checks the suffix array SA of the string T. */ +saint_t +sufcheck(const sauchar_t *T, const saidx_t *SA, + saidx_t n, saint_t verbose) { + saidx_t C[ALPHABET_SIZE]; + saidx_t i, p, q, t; + saint_t c; + + if(verbose) { fprintf(stderr, "sufcheck: "); } + + /* Check arguments. */ + if((T == NULL) || (SA == NULL) || (n < 0)) { + if(verbose) { fprintf(stderr, "Invalid arguments.\n"); } + return -1; + } + if(n == 0) { + if(verbose) { fprintf(stderr, "Done.\n"); } + return 0; + } + + /* check range: [0..n-1] */ + for(i = 0; i < n; ++i) { + if((SA[i] < 0) || (n <= SA[i])) { + if(verbose) { + fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + n - 1, i, SA[i]); + } + return -2; + } + } + + /* check first characters. */ + for(i = 1; i < n; ++i) { + if(T[SA[i - 1]] > T[SA[i]]) { + if(verbose) { + fprintf(stderr, "Suffixes in wrong order.\n" + " T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d" + " > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n", + i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]); + } + return -3; + } + } + + /* check suffixes. */ + for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; } + for(i = 0; i < n; ++i) { ++C[T[i]]; } + for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) { + t = C[i]; + C[i] = p; + p += t; + } + + q = C[T[n - 1]]; + C[T[n - 1]] += 1; + for(i = 0; i < n; ++i) { + p = SA[i]; + if(0 < p) { + c = T[--p]; + t = C[c]; + } else { + c = T[p = n - 1]; + t = q; + } + if((t < 0) || (p != SA[t])) { + if(verbose) { + fprintf(stderr, "Suffix in wrong position.\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n" + " SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n", + t, (0 <= t) ? SA[t] : -1, i, SA[i]); + } + return -4; + } + if(t != q) { + ++C[c]; + if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; } + } + } + + if(1 <= verbose) { fprintf(stderr, "Done.\n"); } + return 0; +} + + +static +int +_compare(const sauchar_t *T, saidx_t Tsize, + const sauchar_t *P, saidx_t Psize, + saidx_t suf, saidx_t *match) { + saidx_t i, j; + saint_t r; + for(i = suf + *match, j = *match, r = 0; + (i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { } + *match = j; + return (r == 0) ? -(j != Psize) : r; +} + +/* Search for the pattern P in the string T. */ +saidx_t +sa_search(const sauchar_t *T, saidx_t Tsize, + const sauchar_t *P, saidx_t Psize, + const saidx_t *SA, saidx_t SAsize, + saidx_t *idx) { + saidx_t size, lsize, rsize, half; + saidx_t match, lmatch, rmatch; + saidx_t llmatch, lrmatch, rlmatch, rrmatch; + saidx_t i, j, k; + saint_t r; + + if(idx != NULL) { *idx = -1; } + if((T == NULL) || (P == NULL) || (SA == NULL) || + (Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; } + if((Tsize == 0) || (SAsize == 0)) { return 0; } + if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; } + + for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + match = MIN(lmatch, rmatch); + r = _compare(T, Tsize, P, Psize, SA[i + half], &match); + if(r < 0) { + i += half + 1; + half -= (size & 1) ^ 1; + lmatch = match; + } else if(r > 0) { + rmatch = match; + } else { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for(llmatch = lmatch, lrmatch = match, half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { + lmatch = MIN(llmatch, lrmatch); + r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch); + if(r < 0) { + j += half + 1; + half -= (lsize & 1) ^ 1; + llmatch = lmatch; + } else { + lrmatch = lmatch; + } + } + + /* right part */ + for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { + rmatch = MIN(rlmatch, rrmatch); + r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch); + if(r <= 0) { + k += half + 1; + half -= (rsize & 1) ^ 1; + rlmatch = rmatch; + } else { + rrmatch = rmatch; + } + } + + break; + } + } + + if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; +} + +/* Search for the character c in the string T. */ +saidx_t +sa_simplesearch(const sauchar_t *T, saidx_t Tsize, + const saidx_t *SA, saidx_t SAsize, + saint_t c, saidx_t *idx) { + saidx_t size, lsize, rsize, half; + saidx_t i, j, k, p; + saint_t r; + + if(idx != NULL) { *idx = -1; } + if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; } + if((Tsize == 0) || (SAsize == 0)) { return 0; } + + for(i = j = k = 0, size = SAsize, half = size >> 1; + 0 < size; + size = half, half >>= 1) { + p = SA[i + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r < 0) { + i += half + 1; + half -= (size & 1) ^ 1; + } else if(r == 0) { + lsize = half, j = i, rsize = size - half - 1, k = i + half + 1; + + /* left part */ + for(half = lsize >> 1; + 0 < lsize; + lsize = half, half >>= 1) { + p = SA[j + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r < 0) { + j += half + 1; + half -= (lsize & 1) ^ 1; + } + } + + /* right part */ + for(half = rsize >> 1; + 0 < rsize; + rsize = half, half >>= 1) { + p = SA[k + half]; + r = (p < Tsize) ? T[p] - c : -1; + if(r <= 0) { + k += half + 1; + half -= (rsize & 1) ^ 1; + } + } + + break; + } + } + + if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; } + return k - j; +} +#endif diff --git a/src/lzsa.c b/src/lzsa.c index c90193e..f9f6543 100755 --- a/src/lzsa.c +++ b/src/lzsa.c @@ -772,7 +772,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam for (i = 0; i < 50; i++) { long long t0 = do_get_time(); if (nOptions & OPT_RAW) - nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize); + nActualDecompressedSize = lzsa_decompressor_expand_block(pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize, nFormatVersion); else nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion); long long t1 = do_get_time(); diff --git a/src/matchfinder.h b/src/matchfinder.h index 09f24bc..630b6a3 100644 --- a/src/matchfinder.h +++ b/src/matchfinder.h @@ -33,6 +33,10 @@ #ifndef _MATCHFINDER_H #define _MATCHFINDER_H +#ifdef __cplusplus +extern "C" { +#endif + /* Forward declarations */ typedef struct _lzsa_match lzsa_match; typedef struct _lzsa_compressor lzsa_compressor; @@ -79,4 +83,8 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con */ void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset); +#ifdef __cplusplus +} +#endif + #endif /* _MATCHFINDER_H */ diff --git a/src/shrink_block_v1.h b/src/shrink_block_v1.h index 0233abd..bacb43a 100644 --- a/src/shrink_block_v1.h +++ b/src/shrink_block_v1.h @@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor; * * @return size of compressed data in output buffer, or -1 if the data is uncompressible */ -int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); +int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize); #endif /* _SHRINK_BLOCK_V1_H */ diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index ca98651..e392e3d 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -566,7 +566,6 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char int nOutOffset = 0; int nCurNibbleOffset = -1, nCurFreeNibbles = 0; int nRepMatchOffset = 0; - lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt; for (i = nStartOffset; i < nEndOffset; ) { lzsa_match *pMatch = pCompressor->best_match + i; @@ -586,15 +585,15 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char } else { if (nMatchOffset <= 32) { - nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1); + nTokenOffsetMode = 0x00 | ((((-nMatchOffset) & 0x01) << 5) ^ 0x20); nOffsetSize = 4; } else if (nMatchOffset <= 512) { - nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3); + nTokenOffsetMode = 0x40 | ((((-nMatchOffset) & 0x100) >> 3) ^ 0x20); nOffsetSize = 8; } else if (nMatchOffset <= (8192 + 512)) { - nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7); + nTokenOffsetMode = 0x80 | ((((-(nMatchOffset - 512)) & 0x0100) >> 3) ^ 0x20); nOffsetSize = 12; } else { @@ -621,14 +620,14 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char } if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) { - nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f); + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-nMatchOffset) & 0x1e) >> 1); if (nOutOffset < 0) return -1; } else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) { pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; } else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) { - nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f); + nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 9) & 0x0f); if (nOutOffset < 0) return -1; pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff; } diff --git a/src/shrink_block_v2.h b/src/shrink_block_v2.h index 4a50ea1..6956199 100644 --- a/src/shrink_block_v2.h +++ b/src/shrink_block_v2.h @@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor; * * @return size of compressed data in output buffer, or -1 if the data is uncompressible */ -int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); +int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize); #endif /* _SHRINK_BLOCK_V2_H */ diff --git a/src/shrink_context.h b/src/shrink_context.h index 848e01f..fa24e60 100644 --- a/src/shrink_context.h +++ b/src/shrink_context.h @@ -35,6 +35,10 @@ #include "divsufsort.h" +#ifdef __cplusplus +extern "C" { +#endif + #define LCP_BITS 15 #define LCP_MAX (1U<<(LCP_BITS - 1)) #define LCP_SHIFT (32-LCP_BITS) @@ -120,4 +124,8 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned ch */ int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor); +#ifdef __cplusplus +} +#endif + #endif /* _SHRINK_CONTEXT_H */ diff --git a/src/shrink_inmem.c b/src/shrink_inmem.c index 51aa671..f456b4f 100644 --- a/src/shrink_inmem.c +++ b/src/shrink_inmem.c @@ -41,8 +41,7 @@ /** * Get maximum compressed size of input(source) data * - * @param pFileData pointer to input(source) data - * @param nFileSize input(source) size in bytes + * @param nInputSize input(source) size in bytes * * @return maximum compressed size */ diff --git a/src/shrink_inmem.h b/src/shrink_inmem.h index d8b9bd3..2c9c89f 100644 --- a/src/shrink_inmem.h +++ b/src/shrink_inmem.h @@ -35,11 +35,14 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + /** * Get maximum compressed size of input(source) data * - * @param pFileData pointer to input(source) data - * @param nFileSize input(source) size in bytes + * @param nInputSize input(source) size in bytes * * @return maximum compressed size */ @@ -61,4 +64,8 @@ size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize); size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion); +#ifdef __cplusplus +} +#endif + #endif /* _SHRINK_INMEM_H */ diff --git a/src/shrink_streaming.c b/src/shrink_streaming.c index 9b5d4fd..b09ab13 100644 --- a/src/shrink_streaming.c +++ b/src/shrink_streaming.c @@ -1,5 +1,5 @@ /* - * shrink_streaming.h - streaming compression definitions + * shrink_streaming.c - streaming compression implementation * * Copyright (C) 2019 Emmanuel Marty * @@ -111,7 +111,6 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) { unsigned char *pInData, *pOutData; lzsa_compressor compressor; - long long nStartTime = 0LL, nEndTime = 0LL; long long nOriginalSize = 0LL, nCompressedSize = 0LL; int nResult; unsigned char cFrameData[16]; diff --git a/src/shrink_streaming.h b/src/shrink_streaming.h index 9acc46d..d962a1a 100644 --- a/src/shrink_streaming.h +++ b/src/shrink_streaming.h @@ -35,6 +35,10 @@ #include "stream.h" +#ifdef __cplusplus +extern "C" { +#endif + /* Forward declaration */ typedef enum _lzsa_status_t lzsa_status_t; @@ -83,4 +87,8 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount); +#ifdef __cplusplus +} +#endif + #endif /* _SHRINK_STREAMING_H */ diff --git a/src/stream.h b/src/stream.h index fb305f7..a8b7922 100644 --- a/src/stream.h +++ b/src/stream.h @@ -33,6 +33,10 @@ #ifndef _STREAM_H #define _STREAM_H +#ifdef __cplusplus +extern "C" { +#endif + /* Forward declaration */ typedef struct _lzsa_stream_t lzsa_stream_t; @@ -92,4 +96,8 @@ typedef struct _lzsa_stream_t { */ int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode); +#ifdef __cplusplus +} +#endif + #endif /* _STREAM_H */