mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-04-05 14:37:05 +00:00
Promote some literal+match sequences to a match
This commit is contained in:
parent
e328f63feb
commit
b1738b4003
@ -512,7 +512,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
|
||||
size_t nDataSizeStep = 128;
|
||||
float fProbabilitySizeStep = 0.0005f;
|
||||
|
||||
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) {
|
||||
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((size_t)((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE))); nGeneratedDataSize += nDataSizeStep) {
|
||||
float fMatchProbability;
|
||||
|
||||
fprintf(stdout, "size %zd", nGeneratedDataSize);
|
||||
@ -530,7 +530,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
|
||||
/* Try to compress it, expected to succeed */
|
||||
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
|
||||
nFlags, nMinMatchSize, nFormatVersion);
|
||||
if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
|
||||
if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
|
||||
free(pTmpDecompressedData);
|
||||
pTmpDecompressedData = NULL;
|
||||
free(pTmpCompressedData);
|
||||
|
@ -301,6 +301,27 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pBestMatch + i;
|
||||
|
||||
if (pMatch->length == 0 &&
|
||||
(i + 1) < (nEndOffset - LAST_LITERALS) &&
|
||||
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
|
||||
pBestMatch[i + 1].offset &&
|
||||
i >= pBestMatch[i + 1].offset &&
|
||||
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
|
||||
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
|
||||
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length);
|
||||
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1);
|
||||
|
||||
if ((nReducedLenSize - nCurLenSize) <= 8) {
|
||||
/* Merge */
|
||||
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
|
||||
pBestMatch[i].offset = pBestMatch[i + 1].offset;
|
||||
pBestMatch[i + 1].length = 0;
|
||||
pBestMatch[i + 1].offset = 0;
|
||||
nDidReduce = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
|
||||
if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
|
||||
(i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
|
||||
@ -639,6 +660,7 @@ int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigne
|
||||
|
||||
/* Compress optimally without breaking ties in favor of less tokens */
|
||||
|
||||
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
||||
lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
|
||||
|
||||
int nDidReduce;
|
||||
@ -655,6 +677,7 @@ int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigne
|
||||
int nReducedCompressedSize;
|
||||
|
||||
/* Compress optimally and do break ties in favor of less tokens */
|
||||
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
||||
lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
|
||||
|
||||
nPasses = 0;
|
||||
|
@ -502,6 +502,27 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
||||
for (i = nStartOffset; i < nEndOffset; ) {
|
||||
lzsa_match *pMatch = pBestMatch + i;
|
||||
|
||||
if (pMatch->length == 0 &&
|
||||
(i + 1) < (nEndOffset - LAST_LITERALS) &&
|
||||
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V2 &&
|
||||
pBestMatch[i + 1].offset &&
|
||||
i >= pBestMatch[i + 1].offset &&
|
||||
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
|
||||
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
|
||||
int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length);
|
||||
int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1);
|
||||
|
||||
if ((nReducedLenSize - nCurLenSize) <= 8) {
|
||||
/* Merge */
|
||||
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
|
||||
pBestMatch[i].offset = pBestMatch[i + 1].offset;
|
||||
pBestMatch[i + 1].length = 0;
|
||||
pBestMatch[i + 1].offset = 0;
|
||||
nDidReduce = 1;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
|
||||
if ((i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) {
|
||||
int nNextIndex = i + pMatch->length;
|
||||
@ -1040,6 +1061,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
|
||||
/* Compress optimally without breaking ties in favor of less tokens */
|
||||
|
||||
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nMatchesPerArrival);
|
||||
if (nInDataSize < 65536)
|
||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, 1 /* insert forward reps */, nMatchesPerArrival);
|
||||
@ -1058,6 +1080,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
int nReducedCompressedSize;
|
||||
|
||||
/* Compress optimally and do break ties in favor of less tokens */
|
||||
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, nMatchesPerArrival);
|
||||
|
||||
nPasses = 0;
|
||||
|
@ -142,7 +142,7 @@ size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer,
|
||||
if (nBlockheaderSize < 0)
|
||||
nError = LZSA_ERROR_COMPRESSION;
|
||||
else {
|
||||
if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
|
||||
if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
|
||||
nError = LZSA_ERROR_DST;
|
||||
else {
|
||||
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);
|
||||
|
Loading…
x
Reference in New Issue
Block a user