Further update LZSA2 format; avoid name conflicts

This commit is contained in:
Emmanuel Marty 2019-06-08 13:35:03 +02:00 committed by GitHub
parent 272f2e7a29
commit 79ed7bf91e
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
31 changed files with 554 additions and 78 deletions

View File

@ -1,5 +1,5 @@
CC=clang
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc -DHAVE_CONFIG_H
CFLAGS=-O3 -fomit-frame-pointer -Isrc/libdivsufsort/include -Isrc
OBJDIR=obj
LDFLAGS=
STRIP=strip
@ -26,9 +26,9 @@ OBJS += $(OBJDIR)/src/shrink_inmem.o
OBJS += $(OBJDIR)/src/shrink_streaming.o
OBJS += $(OBJDIR)/src/stream.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/divsufsort_utils.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/sssort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/trsort.o
OBJS += $(OBJDIR)/src/libdivsufsort/lib/utils.o
all: $(APP)

View File

@ -50,7 +50,7 @@ DECODE_TOKEN
BNE EMBEDDED_LITERALS ; if less, count is directly embedded in token
JSR GETNIBBLE ; get extra literals length nibble
CLC ; add nibble to len from token
; add nibble to len from token
ADC #$03 ; (LITERALS_RUN_LEN_V2)
CMP #$12 ; LITERALS_RUN_LEN_V2 + 15 ?
BNE PREPARE_COPY_LITERALS ; if less, literals count is complete
@ -92,24 +92,20 @@ NO_LITERALS
BMI OFFSET_9_BIT
; 00Z: 5 bit offset
LSR A ; Shift Z (offset bit 4) in place
LSR A
AND #$10
STA FIXUP
LDA #$0FF ; set offset bits 15-8 to 1
STA OFFSHI
LDX #$0FF ; set offset bits 15-8 to 1
STX OFFSHI
JSR GETNIBBLE ; get nibble for offset bits 0-3
ORA FIXUP ; merge offset bit 4
ORA #$E0 ; set offset bits 7-5 to 1
JMP GOT_OFFSET_LO ; go store low byte of match offset and prepare match
JSR GETCOMBINEDBITS ; rotate Z bit into bit 0, read nibble for bits 4-1
ORA #$E0 ; set bits 7-5 to 1
BNE GOT_OFFSET_LO ; go store low byte of match offset and prepare match
OFFSET_9_BIT ; 01Z: 9 bit offset
ASL ; shift Z (offset bit 8) in place
ROL
ROL
ORA #$FE ; set offset bits 15-9 to 1
AND #$01
EOR #$FF ; set offset bits 15-9 to 1
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
@ -119,15 +115,8 @@ REPMATCH_OR_LARGE_OFFSET
; 10Z: 13 bit offset
LSR A ; shift Z (offset bit 4) in place
LSR A
AND #$10
STA FIXUP
JSR GETNIBBLE ; get nibble for offset bits 8-11
ORA FIXUP ; merge offset bit 12
CLC
ADC #$DE ; set bits 13-15 to 1 and substract 2 (to substract 512)
JSR GETCOMBINEDBITS ; rotate Z bit into bit 8, read nibble for bits 12-9
ADC #$DE ; set bits 15-13 to 1 and substract 2 (to substract 512)
BNE GOT_OFFSET_HI ; go store high byte, read low byte of match offset and prepare match
; (*same as JMP GOT_OFFSET_HI but shorter)
@ -160,7 +149,7 @@ REP_MATCH
BNE PREPARE_COPY_MATCH ; if less, length is directly embedded in token
JSR GETNIBBLE ; get extra match length nibble
CLC ; add nibble to len from token
; add nibble to len from token
ADC #$09 ; (MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2)
CMP #$18 ; MIN_MATCH_SIZE_V2 + MATCH_RUN_LEN_V2 + 15?
BNE PREPARE_COPY_MATCH ; if less, match length is complete
@ -194,23 +183,35 @@ GETMATCH_DONE
BNE COPY_MATCH_LOOP
JMP DECODE_TOKEN
GETNIBBLE
DEC NIBCOUNT
BPL HAS_NIBBLES
GETCOMBINEDBITS
STA FIXUP
LDA #$01
STA NIBCOUNT
JSR GETNIBBLE ; get nibble into bits 0-3 (for offset bits 1-4)
BIT FIXUP ; merge Z bit as the carry bit (for offset bit 0)
BVS COMBINEDBITZ
SEC
COMBINEDBITZ
ROL ; nibble -> bits 1-4; carry(!Z bit) -> bit 0 ; carry cleared
RTS
GETNIBBLE
LSR NIBCOUNT
BCS HAS_NIBBLES
INC NIBCOUNT
JSR GETSRC ; get 2 nibbles
STA NIBBLES
LSR A
LSR A
LSR A
LSR A
CLC
RTS
HAS_NIBBLES
LDA NIBBLES
AND #$0F ; isolate low 4 bits of nibble
CLC
RTS
GETPUT

View File

@ -75,10 +75,11 @@ lzsa2_decompress:
xchg ax,cx ; clear ah - cx is zero from the rep movsb above
mov al,020H ; shift Z (offset bit 4) in place
and al,dl
shr al,1
shl al,2
call .get_nibble ; get nibble for offset bits 0-3
or al,cl ; merge nibble
or al,0E0H ; set offset bits 7-5 to 1
rol al,1
xor al,0E1H ; set offset bits 7-5 to 1
dec ah ; set offset bits 15-8 to 1
jmp short .get_match_length
@ -87,7 +88,7 @@ lzsa2_decompress:
lodsb ; get 8 bit offset from stream in A
dec ah ; set offset bits 15-8 to 1
test dl,020H ; test bit Z (offset bit 8)
jne .get_match_length
je .get_match_length
dec ah ; clear bit 8 if Z bit is clear
jmp short .get_match_length
@ -99,10 +100,11 @@ lzsa2_decompress:
mov ah,020H ; shift Z (offset bit 12) in place
and ah,dl
shr ah,1
shl ah,2
call .get_nibble ; get nibble for offset bits 8-11
or ah,cl ; merge nibble
or ah,0E0H ; set offset bits 15-13 to 1
rol ah,1
xor ah,0E1H ; set offset bits 15-13 to 1
sub ah,2 ; substract 512
lodsb ; load match offset bits 0-7
jmp short .get_match_length

View File

@ -39,8 +39,8 @@
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
@ -91,7 +91,7 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData) {
if (*ppDictionaryData) {

View File

@ -35,12 +35,16 @@
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Load dictionary contents
*
* @param pszDictionaryFilename name of dictionary file, or NULL for none
* @param pDictionaryData pointer to returned dictionary contents, or NULL for none
* @param nDictionaryDataSize pointer to returned size of dictionary contents, or 0
* @param ppDictionaryData pointer to returned dictionary contents, or NULL for none
* @param pDictionaryDataSize pointer to returned size of dictionary contents, or 0
*
* @return LZSA_OK for success, or an error value from lzsa_status_t
*/
@ -49,8 +53,12 @@ int lzsa_dictionary_load(const char *pszDictionaryFilename, void **ppDictionaryD
/**
* Free dictionary contents
*
* @param pDictionaryData pointer to pointer to dictionary contents
* @param ppDictionaryData pointer to pointer to dictionary contents
*/
void lzsa_dictionary_free(void **ppDictionaryData);
#ifdef __cplusplus
}
#endif
#endif /* _DICTIONARY_H */

View File

@ -113,7 +113,7 @@ static inline FORCE_INLINE int lzsa_build_match_len_v1(const unsigned char **ppI
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes

View File

@ -37,7 +37,7 @@
* Decompress one LZSA1 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes

View File

@ -99,7 +99,7 @@ static inline FORCE_INLINE int lzsa_build_len_v2(const unsigned char **ppInBlock
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
@ -152,9 +152,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
/* 5 bit offset */
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = nValue;
nMatchOffset |= ((token & 0x20) >> 1);
nMatchOffset ^= 0x1f;
nMatchOffset = nValue << 1;
nMatchOffset |= ((token & 0x20) >> 5);
nMatchOffset ^= 0x1e;
nMatchOffset++;
break;
@ -162,7 +162,7 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
/* 9 bit offset */
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x1ff;
nMatchOffset ^= 0x0ff;
nMatchOffset++;
break;
@ -171,9 +171,9 @@ int lzsa_decompressor_expand_block_v2(const unsigned char *pInBlock, int nBlockS
if (lzsa_get_nibble_v2(&pInBlock, pInBlockEnd, &nCurNibbles, &nibbles, &nValue))
return -1;
nMatchOffset = (unsigned int)(*pInBlock++);
nMatchOffset |= (nValue << 8);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 7);
nMatchOffset ^= 0x1fff;
nMatchOffset |= (nValue << 9);
nMatchOffset |= (((unsigned int)(token & 0x20)) << 3);
nMatchOffset ^= 0x1eff;
nMatchOffset += (512 + 1);
break;

View File

@ -37,7 +37,7 @@
* Decompress one LZSA2 data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes

View File

@ -40,14 +40,15 @@
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize) {
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion) {
if (nFormatVersion == 1)
return lzsa_decompressor_expand_block_v1(pInBlock, nBlockSize, pOutData, nOutDataOffset, nBlockMaxSize);
else if (nFormatVersion == 2)

View File

@ -35,17 +35,26 @@
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Decompress one data block
*
* @param pInBlock pointer to compressed data
* @param nInBlockSize size of compressed data, in bytes
* @param nBlockSize size of compressed data, in bytes
* @param pOutData pointer to output decompression buffer (previously decompressed bytes + room for decompressing this block)
* @param nOutDataOffset starting index of where to store decompressed bytes in output buffer (and size of previously decompressed bytes)
* @param nBlockMaxSize total size of output decompression buffer, in bytes
* @param nFormatVersion version of format to use (1-2)
*
* @return size of decompressed data in bytes, or -1 for error
*/
int lzsa_decompressor_expand_block(const int nFormatVersion, const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize);
int lzsa_decompressor_expand_block(const unsigned char *pInBlock, int nBlockSize, unsigned char *pOutData, int nOutDataOffset, int nBlockMaxSize, const int nFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_CONTEXT_H */

View File

@ -135,7 +135,7 @@ size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOut
if ((pCurFileData + nBlockDataSize) > pEndFileData)
return -1;
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize));
nDecompressedSize = lzsa_decompressor_expand_block(pCurFileData, nBlockDataSize, pCurOutBuffer - nPreviousBlockSize, nPreviousBlockSize, (int)(pEndOutBuffer - pCurOutBuffer + nPreviousBlockSize), nFormatVersion);
if (nDecompressedSize < 0)
return -1;

View File

@ -35,6 +35,10 @@
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum decompressed size of compressed data
*
@ -58,4 +62,8 @@ size_t lzsa_get_max_decompressed_size_inmem(const unsigned char *pFileData, size
*/
size_t lzsa_decompress_inmem(const unsigned char *pFileData, unsigned char *pOutBuffer, size_t nFileSize, size_t nMaxOutBufferSize, int *pFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_INMEM_H */

View File

@ -103,7 +103,6 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut
*/
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
unsigned char cFrameData[16];
unsigned char *pInBlock;
@ -201,9 +200,7 @@ lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pO
nDecompressedSize = nBlockSize;
}
else {
unsigned int nBlockOffs = 0;
nDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE);
nDecompressedSize = lzsa_decompressor_expand_block(pInBlock, nBlockSize, pOutData, BLOCK_SIZE, BLOCK_SIZE, nFormatVersion);
if (nDecompressedSize < 0) {
nDecompressionError = LZSA_ERROR_DECOMPRESSION;
break;

View File

@ -35,6 +35,10 @@
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
@ -75,4 +79,8 @@ lzsa_status_t lzsa_decompress_file(const char *pszInFilename, const char *pszOut
lzsa_status_t lzsa_decompress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, int nFormatVersion,
long long *pOriginalSize, long long *pCompressedSize);
#ifdef __cplusplus
}
#endif
#endif /* _EXPAND_STREAMING_H */

View File

@ -33,6 +33,10 @@
#ifndef _FRAME_H
#define _FRAME_H
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get compressed file header size
*
@ -111,4 +115,8 @@ int lzsa_decode_header(const unsigned char *pFrameData, const int nFrameDataSize
*/
int lzsa_decode_frame(const unsigned char *pFrameData, const int nFrameDataSize, unsigned int *nBlockSize, int *nIsUncompressed);
#ifdef __cplusplus
}
#endif
#endif /* _FRAME_H */

View File

@ -44,6 +44,10 @@
#include "expand_streaming.h"
#include "expand_inmem.h"
#ifdef __cplusplus
extern "C" {
#endif
/** High level status for compression and decompression */
typedef enum _lzsa_status_t {
LZSA_OK = 0, /**< Success */
@ -66,4 +70,8 @@ typedef enum _lzsa_status_t {
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
#ifdef __cplusplus
}
#endif
#endif /* _LIB_H */

View File

@ -0,0 +1,9 @@
#define HAVE_STRING_H 1
#define HAVE_STDLIB_H 1
#define HAVE_MEMORY_H 1
#define HAVE_STDINT_H 1
#define INLINE inline
#ifdef _MSC_VER
#pragma warning( disable : 4244 )
#endif /* _MSC_VER */

View File

@ -31,9 +31,7 @@
extern "C" {
#endif /* __cplusplus */
#if HAVE_CONFIG_H
# include "config.h"
#endif
#include "divsufsort_config.h"
#include <assert.h>
#include <stdio.h>
#if HAVE_STRING_H

View File

@ -0,0 +1,383 @@
/*
* utils.c for libdivsufsort
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
*
* Permission is hereby granted, free of charge, to any person
* obtaining a copy of this software and associated documentation
* files (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use,
* copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following
* conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
* OTHER DEALINGS IN THE SOFTWARE.
*/
#include "divsufsort_private.h"
/*- Private Function -*/
#if 0
/* Binary search for inverse bwt. */
static
saidx_t
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
saidx_t half, i;
for(i = 0, half = size >> 1;
0 < size;
size = half, half >>= 1) {
if(A[i + half] < value) {
i += half + 1;
half -= (size & 1) ^ 1;
}
}
return i;
}
/*- Functions -*/
/* Burrows-Wheeler transform. */
saint_t
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
saidx_t n, saidx_t *idx) {
saidx_t *A, i, j, p, t;
saint_t c;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
if(n <= 1) {
if(n == 1) { U[0] = T[0]; }
*idx = n;
return 0;
}
if((A = SA) == NULL) {
i = divbwt(T, U, NULL, n);
if(0 <= i) { *idx = i; i = 0; }
return (saint_t)i;
}
/* BW transform. */
if(T == U) {
t = n;
for(i = 0, j = 0; i < n; ++i) {
p = t - 1;
t = A[i];
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
j++;
} else {
*idx = i;
}
}
p = t - 1;
if(0 <= p) {
c = T[j];
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
A[j] = c;
} else {
*idx = i;
}
} else {
U[0] = T[n - 1];
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
*idx = i + 1;
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
}
if(SA == NULL) {
/* Deallocate memory. */
free(A);
}
return 0;
}
/* Inverse Burrows-Wheeler transform. */
saint_t
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
saidx_t n, saidx_t idx) {
saidx_t C[ALPHABET_SIZE];
sauchar_t D[ALPHABET_SIZE];
saidx_t *B;
saidx_t i, p;
saint_t c, d;
/* Check arguments. */
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
(n < idx) || ((0 < n) && (idx == 0))) {
return -1;
}
if(n <= 1) { return 0; }
if((B = A) == NULL) {
/* Allocate n*sizeof(saidx_t) bytes of memory. */
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
}
/* Inverse BW transform. */
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
p = C[c];
if(0 < p) {
C[c] = i;
D[d++] = (sauchar_t)c;
i += p;
}
}
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
for(i = 0, p = idx; i < n; ++i) {
U[i] = D[binarysearch_lower(C, d, p)];
p = B[p - 1];
}
if(A == NULL) {
/* Deallocate memory. */
free(B);
}
return 0;
}
/* Checks the suffix array SA of the string T. */
saint_t
sufcheck(const sauchar_t *T, const saidx_t *SA,
saidx_t n, saint_t verbose) {
saidx_t C[ALPHABET_SIZE];
saidx_t i, p, q, t;
saint_t c;
if(verbose) { fprintf(stderr, "sufcheck: "); }
/* Check arguments. */
if((T == NULL) || (SA == NULL) || (n < 0)) {
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
return -1;
}
if(n == 0) {
if(verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
/* check range: [0..n-1] */
for(i = 0; i < n; ++i) {
if((SA[i] < 0) || (n <= SA[i])) {
if(verbose) {
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
n - 1, i, SA[i]);
}
return -2;
}
}
/* check first characters. */
for(i = 1; i < n; ++i) {
if(T[SA[i - 1]] > T[SA[i]]) {
if(verbose) {
fprintf(stderr, "Suffixes in wrong order.\n"
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
}
return -3;
}
}
/* check suffixes. */
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
for(i = 0; i < n; ++i) { ++C[T[i]]; }
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
t = C[i];
C[i] = p;
p += t;
}
q = C[T[n - 1]];
C[T[n - 1]] += 1;
for(i = 0; i < n; ++i) {
p = SA[i];
if(0 < p) {
c = T[--p];
t = C[c];
} else {
c = T[p = n - 1];
t = q;
}
if((t < 0) || (p != SA[t])) {
if(verbose) {
fprintf(stderr, "Suffix in wrong position.\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
}
return -4;
}
if(t != q) {
++C[c];
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
}
}
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
return 0;
}
static
int
_compare(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
saidx_t suf, saidx_t *match) {
saidx_t i, j;
saint_t r;
for(i = suf + *match, j = *match, r = 0;
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
*match = j;
return (r == 0) ? -(j != Psize) : r;
}
/* Search for the pattern P in the string T. */
saidx_t
sa_search(const sauchar_t *T, saidx_t Tsize,
const sauchar_t *P, saidx_t Psize,
const saidx_t *SA, saidx_t SAsize,
saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t match, lmatch, rmatch;
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
saidx_t i, j, k;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (P == NULL) || (SA == NULL) ||
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
match = MIN(lmatch, rmatch);
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
lmatch = match;
} else if(r > 0) {
rmatch = match;
} else {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
lmatch = MIN(llmatch, lrmatch);
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
llmatch = lmatch;
} else {
lrmatch = lmatch;
}
}
/* right part */
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
rmatch = MIN(rlmatch, rrmatch);
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
rlmatch = rmatch;
} else {
rrmatch = rmatch;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}
/* Search for the character c in the string T. */
saidx_t
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
const saidx_t *SA, saidx_t SAsize,
saint_t c, saidx_t *idx) {
saidx_t size, lsize, rsize, half;
saidx_t i, j, k, p;
saint_t r;
if(idx != NULL) { *idx = -1; }
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
if((Tsize == 0) || (SAsize == 0)) { return 0; }
for(i = j = k = 0, size = SAsize, half = size >> 1;
0 < size;
size = half, half >>= 1) {
p = SA[i + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
i += half + 1;
half -= (size & 1) ^ 1;
} else if(r == 0) {
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
/* left part */
for(half = lsize >> 1;
0 < lsize;
lsize = half, half >>= 1) {
p = SA[j + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r < 0) {
j += half + 1;
half -= (lsize & 1) ^ 1;
}
}
/* right part */
for(half = rsize >> 1;
0 < rsize;
rsize = half, half >>= 1) {
p = SA[k + half];
r = (p < Tsize) ? T[p] - c : -1;
if(r <= 0) {
k += half + 1;
half -= (rsize & 1) ^ 1;
}
}
break;
}
}
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
return k - j;
}
#endif

View File

@ -772,7 +772,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
for (i = 0; i < 50; i++) {
long long t0 = do_get_time();
if (nOptions & OPT_RAW)
nActualDecompressedSize = lzsa_decompressor_expand_block(nFormatVersion, pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize);
nActualDecompressedSize = lzsa_decompressor_expand_block(pFileData, (int)nFileSize - 4 /* EOD marker */, pDecompressedData, 0, (int)nMaxDecompressedSize, nFormatVersion);
else
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, &nFormatVersion);
long long t1 = do_get_time();

View File

@ -33,6 +33,10 @@
#ifndef _MATCHFINDER_H
#define _MATCHFINDER_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
typedef struct _lzsa_compressor lzsa_compressor;
@ -79,4 +83,8 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con
*/
void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nStartOffset, const int nEndOffset);
#ifdef __cplusplus
}
#endif
#endif /* _MATCHFINDER_H */

View File

@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V1_H */

View File

@ -566,7 +566,6 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
int nOutOffset = 0;
int nCurNibbleOffset = -1, nCurFreeNibbles = 0;
int nRepMatchOffset = 0;
lzsa_repmatch_opt *repmatch_opt = pCompressor->repmatch_opt;
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pCompressor->best_match + i;
@ -586,15 +585,15 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
}
else {
if (nMatchOffset <= 32) {
nTokenOffsetMode = 0x00 | (((-nMatchOffset) & 0x10) << 1);
nTokenOffsetMode = 0x00 | ((((-nMatchOffset) & 0x01) << 5) ^ 0x20);
nOffsetSize = 4;
}
else if (nMatchOffset <= 512) {
nTokenOffsetMode = 0x40 | (((-nMatchOffset) & 0x100) >> 3);
nTokenOffsetMode = 0x40 | ((((-nMatchOffset) & 0x100) >> 3) ^ 0x20);
nOffsetSize = 8;
}
else if (nMatchOffset <= (8192 + 512)) {
nTokenOffsetMode = 0x80 | (((-(nMatchOffset - 512)) & 0x1000) >> 7);
nTokenOffsetMode = 0x80 | ((((-(nMatchOffset - 512)) & 0x0100) >> 3) ^ 0x20);
nOffsetSize = 12;
}
else {
@ -621,14 +620,14 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const unsigned char
}
if (nTokenOffsetMode == 0x00 || nTokenOffsetMode == 0x20) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, (-nMatchOffset) & 0x0f);
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-nMatchOffset) & 0x1e) >> 1);
if (nOutOffset < 0) return -1;
}
else if (nTokenOffsetMode == 0x40 || nTokenOffsetMode == 0x60) {
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
}
else if (nTokenOffsetMode == 0x80 || nTokenOffsetMode == 0xa0) {
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 8) & 0x0f);
nOutOffset = lzsa_write_nibble_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, ((-(nMatchOffset - 512)) >> 9) & 0x0f);
if (nOutOffset < 0) return -1;
pOutData[nOutOffset++] = (-(nMatchOffset - 512)) & 0xff;
}

View File

@ -48,6 +48,6 @@ typedef struct _lzsa_compressor lzsa_compressor;
*
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize);
#endif /* _SHRINK_BLOCK_V2_H */

View File

@ -35,6 +35,10 @@
#include "divsufsort.h"
#ifdef __cplusplus
extern "C" {
#endif
#define LCP_BITS 15
#define LCP_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
@ -120,4 +124,8 @@ int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, const unsigned ch
*/
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_CONTEXT_H */

View File

@ -41,8 +41,7 @@
/**
* Get maximum compressed size of input(source) data
*
* @param pFileData pointer to input(source) data
* @param nFileSize input(source) size in bytes
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/

View File

@ -35,11 +35,14 @@
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/**
* Get maximum compressed size of input(source) data
*
* @param pFileData pointer to input(source) data
* @param nFileSize input(source) size in bytes
* @param nInputSize input(source) size in bytes
*
* @return maximum compressed size
*/
@ -61,4 +64,8 @@ size_t lzsa_get_max_compressed_size_inmem(size_t nInputSize);
size_t lzsa_compress_inmem(const unsigned char *pInputData, unsigned char *pOutBuffer, size_t nInputSize, size_t nMaxOutBufferSize,
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_INMEM_H */

View File

@ -1,5 +1,5 @@
/*
* shrink_streaming.h - streaming compression definitions
* shrink_streaming.c - streaming compression implementation
*
* Copyright (C) 2019 Emmanuel Marty
*
@ -111,7 +111,6 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount) {
unsigned char *pInData, *pOutData;
lzsa_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nResult;
unsigned char cFrameData[16];

View File

@ -35,6 +35,10 @@
#include "stream.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef enum _lzsa_status_t lzsa_status_t;
@ -83,4 +87,8 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount);
#ifdef __cplusplus
}
#endif
#endif /* _SHRINK_STREAMING_H */

View File

@ -33,6 +33,10 @@
#ifndef _STREAM_H
#define _STREAM_H
#ifdef __cplusplus
extern "C" {
#endif
/* Forward declaration */
typedef struct _lzsa_stream_t lzsa_stream_t;
@ -92,4 +96,8 @@ typedef struct _lzsa_stream_t {
*/
int lzsa_filestream_open(lzsa_stream_t *stream, const char *pszInFilename, const char *pszMode);
#ifdef __cplusplus
}
#endif
#endif /* _STREAM_H */