Promote some literal+match sequences to a match

This commit is contained in:
Emmanuel Marty 2019-11-18 12:10:23 +01:00 committed by GitHub
parent e328f63feb
commit b1738b4003
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 49 additions and 3 deletions

View File

@ -512,7 +512,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
size_t nDataSizeStep = 128;
float fProbabilitySizeStep = 0.0005f;
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE)); nGeneratedDataSize += nDataSizeStep) {
for (nGeneratedDataSize = 1024; nGeneratedDataSize <= ((size_t)((nOptions & OPT_RAW) ? BLOCK_SIZE : (4 * BLOCK_SIZE))); nGeneratedDataSize += nDataSizeStep) {
float fMatchProbability;
fprintf(stdout, "size %zd", nGeneratedDataSize);
@ -530,7 +530,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, in
/* Try to compress it, expected to succeed */
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
nFlags, nMinMatchSize, nFormatVersion);
if (nActualCompressedSize == -1 || nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);

View File

@ -301,6 +301,27 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < (nEndOffset - LAST_LITERALS) &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
pBestMatch[i].offset = pBestMatch[i + 1].offset;
pBestMatch[i + 1].length = 0;
pBestMatch[i + 1].offset = 0;
nDidReduce = 1;
continue;
}
}
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
(i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
@ -639,6 +660,7 @@ int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigne
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */);
int nDidReduce;
@ -655,6 +677,7 @@ int lzsa_optimize_and_write_block_v1(lzsa_compressor *pCompressor, const unsigne
int nReducedCompressedSize;
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v1(pCompressor, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */);
nPasses = 0;

View File

@ -502,6 +502,27 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
for (i = nStartOffset; i < nEndOffset; ) {
lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length == 0 &&
(i + 1) < (nEndOffset - LAST_LITERALS) &&
pBestMatch[i + 1].length >= MIN_MATCH_SIZE_V2 &&
pBestMatch[i + 1].offset &&
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= (nEndOffset - LAST_LITERALS) &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length);
int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
pBestMatch[i].length = pBestMatch[i + 1].length + 1;
pBestMatch[i].offset = pBestMatch[i + 1].offset;
pBestMatch[i + 1].length = 0;
pBestMatch[i + 1].offset = 0;
nDidReduce = 1;
continue;
}
}
if (pMatch->length >= MIN_MATCH_SIZE_V2) {
if ((i + pMatch->length) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
@ -1040,6 +1061,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nMatchesPerArrival);
if (nInDataSize < 65536)
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, 1 /* insert forward reps */, nMatchesPerArrival);
@ -1058,6 +1080,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
int nReducedCompressedSize;
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, nMatchesPerArrival);
nPasses = 0;

View File

@ -142,7 +142,7 @@ size_t lzsa_compress_inmem(unsigned char *pInputData, unsigned char *pOutBuffer,
if (nBlockheaderSize < 0)
nError = LZSA_ERROR_COMPRESSION;
else {
if (nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
if ((size_t)nInDataSize > (nMaxOutBufferSize - (nCompressedSize + nBlockheaderSize)))
nError = LZSA_ERROR_DST;
else {
memcpy(pOutBuffer + nBlockheaderSize + nCompressedSize, pInputData + nOriginalSize, nInDataSize);