Simplify LZSA1 token reducer (same binary output)

This commit is contained in:
Emmanuel Marty 2019-09-22 20:34:08 +02:00 committed by GitHub
parent 1a4f662360
commit 1869d85c1f
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -388,66 +388,48 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const in
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
if (pMatch->length >= MIN_MATCH_SIZE_V1) { if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchLen = pMatch->length; if (pMatch->length <= 9 /* Don't waste time considering large matches, they will always win over literals */ &&
int nReduce = 0; (i + pMatch->length) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */) {
int nNextIndex = i + pMatch->length;
int nNextLiterals = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ { while (nNextIndex < nEndOffset && pCompressor->match[nNextIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset; nNextLiterals++;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; nNextIndex++;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); }
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE_V1) { /* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nMatchLen))) { if ((8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + ((pMatch->offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1) +
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is (8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current /* Reduce */
* match command by literals, the output size will not increase and it will remove one command. */ int nMatchLen = pMatch->length;
nReduce = 1; int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
} }
}
else {
int nCurIndex = i + nMatchLen;
int nNextNumLiterals = 0;
do { nDidReduce = 1;
nCurIndex++;
nNextNumLiterals++;
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE_V1);
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size_v1(nNextNumLiterals))) {
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
nReduce = 1;
}
}
}
if (nReduce) {
int j;
for (j = 0; j < nMatchLen; j++) {
pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0;
}
nNumLiterals += nMatchLen;
i += nMatchLen;
nDidReduce = 1;
}
else {
if ((i + nMatchLen) < nEndOffset && nMatchLen >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (nMatchLen % pMatch->offset) == 0 &&
(nMatchLen + pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
continue; continue;
} }
nNumLiterals = 0;
i += nMatchLen;
} }
if ((i + pMatch->length) < nEndOffset && pMatch->length >= LCP_MAX &&
pMatch->offset && pMatch->offset <= 32 && pCompressor->match[(i + pMatch->length) << MATCHES_PER_OFFSET_SHIFT].offset == pMatch->offset && (pMatch->length % pMatch->offset) == 0 &&
(pMatch->length + pCompressor->match[(i + pMatch->length) << MATCHES_PER_OFFSET_SHIFT].length) <= MAX_VARLEN) {
int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].offset = 0;
pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length = -1;
continue;
}
i += pMatch->length;
nNumLiterals = 0;
} }
else { else {
nNumLiterals++; nNumLiterals++;