Add extra safety checks to LZSA2 token reducer

This commit is contained in:
Emmanuel Marty 2019-09-22 20:41:09 +02:00 committed by GitHub
parent 1869d85c1f
commit 81e15d10f0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -667,39 +667,44 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
/* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when /* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
* matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the * matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the
* ratio (the forward arrivals parser already has this covered). */ * ratio (the forward arrivals parser already has this covered). */
if (i >= nRepMatchOffset && !memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length)) if (i >= nRepMatchOffset &&
(i - nRepMatchOffset + pMatch->length) <= (nEndOffset - LAST_LITERALS) &&
!memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length))
pMatch->offset = nRepMatchOffset; pMatch->offset = nRepMatchOffset;
} }
/* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */ if (pMatch->length < 9 /* Don't waste time considering large matches, they will always win over literals */) {
int nCurCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2); /* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if (pMatch->offset != nRepMatchOffset)
nCurCommandSize += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
/* Calculate the next command's current cost */ int nCurCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
int nNextCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2); if (pMatch->offset != nRepMatchOffset)
if (pBestMatch[nNextIndex].offset != pMatch->offset) nCurCommandSize += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
nNextCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize; /* Calculate the next command's current cost */
int nNextCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
if (pBestMatch[nNextIndex].offset != pMatch->offset)
nNextCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
/* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */ int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
int nReducedCommandSize = (pMatch->length << 3) + 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals + pMatch->length + nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
if (pBestMatch[nNextIndex].offset != nRepMatchOffset)
nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
if (nOriginalCombinedCommandSize >= nReducedCommandSize) { /* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */
/* Reduce */ int nReducedCommandSize = (pMatch->length << 3) + 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals + pMatch->length + nNextLiterals) + (nNextLiterals << 3) + lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
int nMatchLen = pMatch->length; if (pBestMatch[nNextIndex].offset != nRepMatchOffset)
int j; nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
for (j = 0; j < nMatchLen; j++) { if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
pBestMatch[i + j].length = 0; /* Reduce */
int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
pBestMatch[i + j].length = 0;
}
nDidReduce = 1;
continue;
} }
nDidReduce = 1;
continue;
} }
} }
} }