mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-01-11 12:30:24 +00:00
Reduce token count by 1% without changing the ratio
This commit is contained in:
parent
b7967c3aa1
commit
9e75a55dd3
58
src/shrink.c
58
src/shrink.c
@ -49,6 +49,8 @@
|
||||
#define LAST_MATCH_OFFSET 4
|
||||
#define LAST_LITERALS 1
|
||||
|
||||
#define MODESWITCH_PENALTY 1
|
||||
|
||||
/** One match */
|
||||
typedef struct _lzsa_match {
|
||||
unsigned short length;
|
||||
@ -389,11 +391,11 @@ static void lzsa_find_all_matches(lsza_compressor *pCompressor, const int nStart
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of extra bytes required to represent a literals length
|
||||
* Get the number of extra bits required to represent a literals length
|
||||
*
|
||||
* @param nLength literals length
|
||||
*
|
||||
* @return number of extra bytes required
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_literals_varlen_size(const int nLength) {
|
||||
if (nLength < LITERALS_RUN_LEN) {
|
||||
@ -401,12 +403,12 @@ static inline int lzsa_get_literals_varlen_size(const int nLength) {
|
||||
}
|
||||
else {
|
||||
if (nLength < 256)
|
||||
return 1;
|
||||
return 8;
|
||||
else {
|
||||
if (nLength < 512)
|
||||
return 2;
|
||||
return 16;
|
||||
else
|
||||
return 3;
|
||||
return 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -440,11 +442,11 @@ static inline int lzsa_write_literals_varlen(unsigned char *pOutData, int nOutOf
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the number of extra bytes required to represent an encoded match length
|
||||
* Get the number of extra bits required to represent an encoded match length
|
||||
*
|
||||
* @param nLength encoded match length (actual match length - MIN_MATCH_SIZE)
|
||||
*
|
||||
* @return number of extra bytes required
|
||||
* @return number of extra bits required
|
||||
*/
|
||||
static inline int lzsa_get_match_varlen_size(const int nLength) {
|
||||
if (nLength < MATCH_RUN_LEN) {
|
||||
@ -452,12 +454,12 @@ static inline int lzsa_get_match_varlen_size(const int nLength) {
|
||||
}
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE) < 256)
|
||||
return 1;
|
||||
return 8;
|
||||
else {
|
||||
if ((nLength + MIN_MATCH_SIZE) < 512)
|
||||
return 2;
|
||||
return 16;
|
||||
else
|
||||
return 3;
|
||||
return 24;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -503,19 +505,21 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
int i;
|
||||
|
||||
cost[nEndOffset - 1] = 1;
|
||||
cost[nEndOffset - 1] = 8;
|
||||
nLastLiteralsOffset = nEndOffset;
|
||||
|
||||
for (i = nEndOffset - 2; i != (nStartOffset - 1); i--) {
|
||||
int nBestCost, nBestMatchLen, nBestMatchOffset;
|
||||
|
||||
int nLiteralsLen = nLastLiteralsOffset - i;
|
||||
nBestCost = 1 + cost[i + 1];
|
||||
nBestCost = 8 + cost[i + 1];
|
||||
if (nLiteralsLen == LITERALS_RUN_LEN || nLiteralsLen == 256 || nLiteralsLen == 512) {
|
||||
/* Add to the cost of encoding literals as their number crosses a variable length encoding boundary.
|
||||
* The cost automatically accumulates down the chain. */
|
||||
nBestCost++;
|
||||
nBestCost += 8;
|
||||
}
|
||||
if (pCompressor->match[(i + 1) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nBestCost += MODESWITCH_PENALTY;
|
||||
nBestMatchLen = 0;
|
||||
nBestMatchOffset = 0;
|
||||
|
||||
@ -523,7 +527,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
int m;
|
||||
|
||||
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
|
||||
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 1 : 2;
|
||||
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 8 : 16;
|
||||
|
||||
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
|
||||
int nCurCost;
|
||||
@ -532,8 +536,10 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||
|
||||
nCurCost = 1 + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE);
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(nMatchLen - MIN_MATCH_SIZE);
|
||||
nCurCost += cost[i + nMatchLen];
|
||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
nBestCost = nCurCost;
|
||||
@ -555,8 +561,10 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
nCurCost = 1 + nMatchOffsetSize /* no extra match len bytes */;
|
||||
nCurCost = 8 + nMatchOffsetSize /* no extra match len bytes */;
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
nBestCost = nCurCost;
|
||||
@ -568,8 +576,10 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
for (; k <= nMatchLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
nCurCost = 1 + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE);
|
||||
nCurCost = 8 + nMatchOffsetSize + lzsa_get_match_varlen_size(k - MIN_MATCH_SIZE);
|
||||
nCurCost += cost[i + k];
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
nBestCost = nCurCost;
|
||||
@ -614,10 +624,10 @@ static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int n
|
||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <FF> <ll> <ll> <offset> <offset> <FF> <mm> <mm> */ {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
|
||||
int nCommandSize = 1 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 1 : 2) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||
|
||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) {
|
||||
if (nCommandSize >= (nMatchLen + lzsa_get_literals_varlen_size(nNumLiterals + nMatchLen))) {
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nMatchLen))) {
|
||||
/* This command is a match; the next command is also a match. The next command currently has no literals; replacing this command by literals will
|
||||
* make the next command eat the cost of encoding the current number of literals, + nMatchLen extra literals. The size of the current match command is
|
||||
* at least as much as the number of literal bytes + the extra cost of encoding them in the next match command, so we can safely replace the current
|
||||
@ -634,7 +644,7 @@ static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int n
|
||||
nNextNumLiterals++;
|
||||
} while (nCurIndex < nEndOffset && pCompressor->match[nCurIndex << MATCHES_PER_OFFSET_SHIFT].length < MIN_MATCH_SIZE);
|
||||
|
||||
if (nCommandSize >= (nMatchLen + lzsa_get_literals_varlen_size(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size(nNextNumLiterals))) {
|
||||
if (nCommandSize >= ((nMatchLen << 3) + lzsa_get_literals_varlen_size(nNumLiterals + nNextNumLiterals + nMatchLen) - lzsa_get_literals_varlen_size(nNextNumLiterals))) {
|
||||
/* This command is a match, and is followed by literals, and then another match or the end of the input data. If encoding this match as literals doesn't take
|
||||
* more room than the match, and doesn't grow the next match command's literals encoding, go ahead and remove the command. */
|
||||
nReduce = 1;
|
||||
@ -706,9 +716,9 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
|
||||
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN) ? MATCH_RUN_LEN : nEncodedMatchLen;
|
||||
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
|
||||
int nCommandSize = 1 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + nNumLiterals + (nTokenLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||
|
||||
if ((nOutOffset + nCommandSize) > nMaxOutDataSize)
|
||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
if (nMatchOffset < MIN_OFFSET || nMatchOffset > MAX_OFFSET)
|
||||
return -1;
|
||||
@ -741,9 +751,9 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
|
||||
{
|
||||
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN) ? LITERALS_RUN_LEN : nNumLiterals;
|
||||
int nCommandSize = 1 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + nNumLiterals;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNumLiterals << 3);
|
||||
|
||||
if ((nOutOffset + nCommandSize) > nMaxOutDataSize)
|
||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
||||
|
Loading…
x
Reference in New Issue
Block a user