mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-02-16 17:30:44 +00:00
Merge pull request #3 from emmanuel-marty/master
Sync with E.Marty's branch
This commit is contained in:
commit
e3d7ec9c40
37
src/lzsa.c
37
src/lzsa.c
@ -46,6 +46,7 @@
|
|||||||
#define OPT_RAW 2
|
#define OPT_RAW 2
|
||||||
#define OPT_FAVOR_RATIO 4
|
#define OPT_FAVOR_RATIO 4
|
||||||
#define OPT_RAW_BACKWARD 8
|
#define OPT_RAW_BACKWARD 8
|
||||||
|
#define OPT_STATS 16
|
||||||
|
|
||||||
#define TOOL_VERSION "1.1.0"
|
#define TOOL_VERSION "1.1.0"
|
||||||
|
|
||||||
@ -104,6 +105,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
|
|||||||
int nCommandCount = 0, nSafeDist = 0;
|
int nCommandCount = 0, nSafeDist = 0;
|
||||||
int nFlags;
|
int nFlags;
|
||||||
lzsa_status_t nStatus;
|
lzsa_status_t nStatus;
|
||||||
|
lzsa_stats stats;
|
||||||
|
|
||||||
nFlags = 0;
|
nFlags = 0;
|
||||||
if (nOptions & OPT_FAVOR_RATIO)
|
if (nOptions & OPT_FAVOR_RATIO)
|
||||||
@ -117,7 +119,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
|
|||||||
nStartTime = do_get_time();
|
nStartTime = do_get_time();
|
||||||
}
|
}
|
||||||
|
|
||||||
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist);
|
nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist, &stats);
|
||||||
|
|
||||||
if ((nOptions & OPT_VERBOSE)) {
|
if ((nOptions & OPT_VERBOSE)) {
|
||||||
nEndTime = do_get_time();
|
nEndTime = do_get_time();
|
||||||
@ -149,6 +151,32 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nOptions & OPT_STATS) {
|
||||||
|
if (stats.literals_divisor > 0)
|
||||||
|
fprintf(stdout, "Literals: min: %d avg: %d max: %d count: %d\n", stats.min_literals, stats.total_literals / stats.literals_divisor, stats.max_literals, stats.literals_divisor);
|
||||||
|
else
|
||||||
|
fprintf(stdout, "Literals: none\n");
|
||||||
|
if (stats.match_divisor > 0) {
|
||||||
|
fprintf(stdout, "Offsets: min: %d avg: %d max: %d reps: %d count: %d\n", stats.min_offset, stats.total_offsets / stats.match_divisor, stats.max_offset, stats.num_rep_offsets, stats.match_divisor);
|
||||||
|
fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stdout, "Offsets: none\n");
|
||||||
|
fprintf(stdout, "Match lens: none\n");
|
||||||
|
}
|
||||||
|
if (stats.rle1_divisor > 0) {
|
||||||
|
fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stdout, "RLE1 lens: none\n");
|
||||||
|
}
|
||||||
|
if (stats.rle2_divisor > 0) {
|
||||||
|
fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
fprintf(stdout, "RLE2 lens: none\n");
|
||||||
|
}
|
||||||
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -1009,6 +1037,13 @@ int main(int argc, char **argv) {
|
|||||||
else
|
else
|
||||||
bArgsError = true;
|
bArgsError = true;
|
||||||
}
|
}
|
||||||
|
else if (!strcmp(argv[i], "-stats")) {
|
||||||
|
if ((nOptions & OPT_STATS) == 0) {
|
||||||
|
nOptions |= OPT_STATS;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
bArgsError = true;
|
||||||
|
}
|
||||||
else {
|
else {
|
||||||
if (!pszInFilename)
|
if (!pszInFilename)
|
||||||
pszInFilename = argv[i];
|
pszInFilename = argv[i];
|
||||||
|
@ -98,7 +98,9 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
|
|||||||
nLen = 0;
|
nLen = 0;
|
||||||
if (nLen > LCP_MAX)
|
if (nLen > LCP_MAX)
|
||||||
nLen = LCP_MAX;
|
nLen = LCP_MAX;
|
||||||
int nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
|
int nTaggedLen = 0;
|
||||||
|
if (nLen)
|
||||||
|
nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1));
|
||||||
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
|
intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -380,6 +380,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
|
pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen;
|
||||||
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
||||||
|
|
||||||
|
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
|
||||||
|
pCompressor->stats.min_literals = nNumLiterals;
|
||||||
|
if (nNumLiterals > pCompressor->stats.max_literals)
|
||||||
|
pCompressor->stats.max_literals = nNumLiterals;
|
||||||
|
pCompressor->stats.total_literals += nNumLiterals;
|
||||||
|
pCompressor->stats.literals_divisor++;
|
||||||
|
|
||||||
if (nNumLiterals != 0) {
|
if (nNumLiterals != 0) {
|
||||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||||
nOutOffset += nNumLiterals;
|
nOutOffset += nNumLiterals;
|
||||||
@ -391,6 +398,37 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
||||||
}
|
}
|
||||||
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
|
nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen);
|
||||||
|
|
||||||
|
if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
|
||||||
|
pCompressor->stats.min_offset = nMatchOffset;
|
||||||
|
if (nMatchOffset > pCompressor->stats.max_offset)
|
||||||
|
pCompressor->stats.max_offset = nMatchOffset;
|
||||||
|
pCompressor->stats.total_offsets += nMatchOffset;
|
||||||
|
|
||||||
|
if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
|
||||||
|
pCompressor->stats.min_match_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_match_len)
|
||||||
|
pCompressor->stats.max_match_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_match_lens += nMatchLen;
|
||||||
|
pCompressor->stats.match_divisor++;
|
||||||
|
|
||||||
|
if (nMatchOffset == 1) {
|
||||||
|
if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
|
||||||
|
pCompressor->stats.min_rle1_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_rle1_len)
|
||||||
|
pCompressor->stats.max_rle1_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_rle1_lens += nMatchLen;
|
||||||
|
pCompressor->stats.rle1_divisor++;
|
||||||
|
}
|
||||||
|
else if (nMatchOffset == 2) {
|
||||||
|
if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
|
||||||
|
pCompressor->stats.min_rle2_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_rle2_len)
|
||||||
|
pCompressor->stats.max_rle2_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_rle2_lens += nMatchLen;
|
||||||
|
pCompressor->stats.rle2_divisor++;
|
||||||
|
}
|
||||||
|
|
||||||
i += nMatchLen;
|
i += nMatchLen;
|
||||||
|
|
||||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||||
@ -422,6 +460,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char
|
|||||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
|
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
|
||||||
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals);
|
||||||
|
|
||||||
|
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
|
||||||
|
pCompressor->stats.min_literals = nNumLiterals;
|
||||||
|
if (nNumLiterals > pCompressor->stats.max_literals)
|
||||||
|
pCompressor->stats.max_literals = nNumLiterals;
|
||||||
|
pCompressor->stats.total_literals += nNumLiterals;
|
||||||
|
pCompressor->stats.literals_divisor++;
|
||||||
|
|
||||||
if (nNumLiterals != 0) {
|
if (nNumLiterals != 0) {
|
||||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||||
nOutOffset += nNumLiterals;
|
nOutOffset += nNumLiterals;
|
||||||
|
@ -192,13 +192,17 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
|
|
||||||
memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
|
memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT));
|
||||||
|
|
||||||
|
for (i = (nStartOffset << MATCHES_PER_OFFSET_SHIFT); i != (nEndOffset << MATCHES_PER_OFFSET_SHIFT); i++) {
|
||||||
|
arrival[i].cost = 0x40000000;
|
||||||
|
}
|
||||||
|
|
||||||
arrival[nStartOffset << MATCHES_PER_OFFSET_SHIFT].from_slot = -1;
|
arrival[nStartOffset << MATCHES_PER_OFFSET_SHIFT].from_slot = -1;
|
||||||
|
|
||||||
for (i = nStartOffset; i != (nEndOffset - 1); i++) {
|
for (i = nStartOffset; i != (nEndOffset - 1); i++) {
|
||||||
int m, nMatches;
|
int m, nMatches;
|
||||||
|
|
||||||
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
|
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
|
||||||
int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
|
const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost & 0x3fffffff;
|
||||||
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
|
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
|
||||||
int nNumLiterals = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals + 1;
|
int nNumLiterals = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals + 1;
|
||||||
|
|
||||||
@ -215,34 +219,39 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
if (!nFavorRatio && nNumLiterals == 1)
|
if (!nFavorRatio && nNumLiterals == 1)
|
||||||
nCodingChoiceCost += MODESWITCH_PENALTY;
|
nCodingChoiceCost += MODESWITCH_PENALTY;
|
||||||
|
|
||||||
int exists = 0;
|
lzsa_arrival *pDestSlots = &arrival[(i + 1) << MATCHES_PER_OFFSET_SHIFT];
|
||||||
for (n = 0;
|
if (nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) {
|
||||||
n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost;
|
int exists = 0;
|
||||||
n++) {
|
for (n = 0;
|
||||||
if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) {
|
n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nCodingChoiceCost;
|
||||||
exists = 1;
|
n++) {
|
||||||
}
|
if (pDestSlots[n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) {
|
||||||
}
|
exists = 1;
|
||||||
|
break;
|
||||||
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
|
|
||||||
lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n];
|
|
||||||
if (pDestArrival->from_slot == 0 ||
|
|
||||||
nCodingChoiceCost <= pDestArrival->cost) {
|
|
||||||
|
|
||||||
if (pDestArrival->from_slot) {
|
|
||||||
memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n + 1],
|
|
||||||
&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n],
|
|
||||||
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pDestArrival->cost = nCodingChoiceCost;
|
if (!exists) {
|
||||||
pDestArrival->from_pos = i;
|
for (n = 0; n < NMATCHES_PER_OFFSET; n++) {
|
||||||
pDestArrival->from_slot = j + 1;
|
lzsa_arrival *pDestArrival = &pDestSlots[n];
|
||||||
pDestArrival->match_offset = 0;
|
if (nCodingChoiceCost <= pDestArrival->cost) {
|
||||||
pDestArrival->match_len = 0;
|
|
||||||
pDestArrival->num_literals = nNumLiterals;
|
if (pDestArrival->from_slot) {
|
||||||
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
|
memmove(&pDestSlots[n + 1],
|
||||||
break;
|
&pDestSlots[n],
|
||||||
|
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
pDestArrival->cost = nCodingChoiceCost;
|
||||||
|
pDestArrival->from_pos = i;
|
||||||
|
pDestArrival->from_slot = j + 1;
|
||||||
|
pDestArrival->match_offset = 0;
|
||||||
|
pDestArrival->match_len = 0;
|
||||||
|
pDestArrival->num_literals = nNumLiterals;
|
||||||
|
pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -254,109 +263,124 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
int nMatchOffset = match[m].offset;
|
int nMatchOffset = match[m].offset;
|
||||||
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
|
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
|
||||||
int nStartingMatchLen, k;
|
int nStartingMatchLen, k;
|
||||||
|
int nMaxRepLen[NMATCHES_PER_OFFSET];
|
||||||
|
|
||||||
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS))
|
||||||
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
nMatchLen = nEndOffset - LAST_LITERALS - i;
|
||||||
|
|
||||||
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
|
|
||||||
nStartingMatchLen = nMatchLen;
|
|
||||||
else
|
|
||||||
nStartingMatchLen = nMinMatchSize;
|
|
||||||
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
|
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
|
||||||
const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost;
|
|
||||||
int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
|
int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
|
||||||
int nMaxRepLen = 0;
|
int nCurMaxRepLen = 0;
|
||||||
|
|
||||||
if (nMatchOffset != nRepOffset &&
|
if (nMatchOffset != nRepOffset &&
|
||||||
nRepOffset &&
|
nRepOffset &&
|
||||||
i >= nRepOffset &&
|
i >= nRepOffset &&
|
||||||
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
|
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
|
||||||
while (nMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nMaxRepLen] == pInWindow[i - nMatchOffset + nMaxRepLen])
|
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i - nMatchOffset + nCurMaxRepLen])
|
||||||
nMaxRepLen++;
|
nCurMaxRepLen++;
|
||||||
}
|
}
|
||||||
|
|
||||||
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
nMaxRepLen[j] = nCurMaxRepLen;
|
||||||
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
|
}
|
||||||
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT];
|
while (j < NMATCHES_PER_OFFSET)
|
||||||
|
nMaxRepLen[j++] = 0;
|
||||||
|
|
||||||
|
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
|
||||||
|
nStartingMatchLen = nMatchLen;
|
||||||
|
else
|
||||||
|
nStartingMatchLen = nMinMatchSize;
|
||||||
|
|
||||||
|
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
||||||
|
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
|
||||||
|
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT];
|
||||||
|
|
||||||
|
for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) {
|
||||||
|
const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost & 0x3fffffff;
|
||||||
|
int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset;
|
||||||
|
|
||||||
int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost;
|
int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost;
|
||||||
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
|
int nRepCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
|
||||||
int exists = 0;
|
int nCodingChoiceCost = nRepCodingChoiceCost + nMatchOffsetCost;
|
||||||
|
|
||||||
if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
|
if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals)
|
||||||
nCodingChoiceCost += MODESWITCH_PENALTY;
|
nCodingChoiceCost += MODESWITCH_PENALTY;
|
||||||
|
|
||||||
for (n = 0;
|
if (nRepCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) {
|
||||||
n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
|
if (nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) {
|
||||||
n++) {
|
int exists = 0;
|
||||||
if (pDestSlots[n].rep_offset == nMatchOffset) {
|
|
||||||
exists = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
|
for (n = 0;
|
||||||
lzsa_arrival *pDestArrival = &pDestSlots[n];
|
n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nCodingChoiceCost;
|
||||||
|
n++) {
|
||||||
if (pDestArrival->from_slot == 0 ||
|
if (pDestSlots[n].rep_offset == nMatchOffset) {
|
||||||
nCodingChoiceCost <= pDestArrival->cost) {
|
exists = 1;
|
||||||
|
break;
|
||||||
if (pDestArrival->from_slot) {
|
|
||||||
memmove(&pDestSlots[n + 1],
|
|
||||||
&pDestSlots[n],
|
|
||||||
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
pDestArrival->cost = nCodingChoiceCost;
|
|
||||||
pDestArrival->from_pos = i;
|
|
||||||
pDestArrival->from_slot = j + 1;
|
|
||||||
pDestArrival->match_offset = nMatchOffset;
|
|
||||||
pDestArrival->match_len = k;
|
|
||||||
pDestArrival->num_literals = 0;
|
|
||||||
pDestArrival->rep_offset = nMatchOffset;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* If this coding choice doesn't rep-match, see if we still get a match by using the current repmatch offset for this arrival. This can occur (and not have the
|
|
||||||
* matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions
|
|
||||||
* of identical bytes, for instance. Checking for this provides a big compression win on some files. */
|
|
||||||
|
|
||||||
if (i >= nRepOffset && nMaxRepLen >= k) {
|
|
||||||
/* A match is possible at the rep offset; insert the extra coding choice. */
|
|
||||||
|
|
||||||
nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + /* rep match - no offset cost */ nMatchLenCost;
|
|
||||||
exists = 0;
|
|
||||||
|
|
||||||
for (n = 0;
|
|
||||||
n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost;
|
|
||||||
n++) {
|
|
||||||
if (pDestSlots[n].rep_offset == nRepOffset) {
|
|
||||||
exists = 1;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) {
|
|
||||||
lzsa_arrival *pDestArrival = &pDestSlots[n];
|
|
||||||
|
|
||||||
if (pDestArrival->from_slot == 0 ||
|
|
||||||
nCodingChoiceCost <= pDestArrival->cost) {
|
|
||||||
|
|
||||||
if (pDestArrival->from_slot) {
|
|
||||||
memmove(&pDestSlots[n + 1],
|
|
||||||
&pDestSlots[n],
|
|
||||||
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pDestArrival->cost = nCodingChoiceCost;
|
if (!exists) {
|
||||||
pDestArrival->from_pos = i;
|
for (n = 0; n < NMATCHES_PER_OFFSET; n++) {
|
||||||
pDestArrival->from_slot = j + 1;
|
lzsa_arrival *pDestArrival = &pDestSlots[n];
|
||||||
pDestArrival->match_offset = nRepOffset;
|
|
||||||
pDestArrival->match_len = k;
|
if (nCodingChoiceCost <= pDestArrival->cost) {
|
||||||
pDestArrival->num_literals = 0;
|
if (pDestArrival->from_slot) {
|
||||||
pDestArrival->rep_offset = nRepOffset;
|
memmove(&pDestSlots[n + 1],
|
||||||
break;
|
&pDestSlots[n],
|
||||||
|
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
pDestArrival->cost = nCodingChoiceCost;
|
||||||
|
pDestArrival->from_pos = i;
|
||||||
|
pDestArrival->from_slot = j + 1;
|
||||||
|
pDestArrival->match_offset = nMatchOffset;
|
||||||
|
pDestArrival->match_len = k;
|
||||||
|
pDestArrival->num_literals = 0;
|
||||||
|
pDestArrival->rep_offset = nMatchOffset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* If this coding choice doesn't rep-match, see if we still get a match by using the current repmatch offset for this arrival. This can occur (and not have the
|
||||||
|
* matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions
|
||||||
|
* of identical bytes, for instance. Checking for this provides a big compression win on some files. */
|
||||||
|
|
||||||
|
if (nMaxRepLen[j] >= k) {
|
||||||
|
int exists = 0;
|
||||||
|
|
||||||
|
/* A match is possible at the rep offset; insert the extra coding choice. */
|
||||||
|
|
||||||
|
for (n = 0;
|
||||||
|
n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nRepCodingChoiceCost;
|
||||||
|
n++) {
|
||||||
|
if (pDestSlots[n].rep_offset == nRepOffset) {
|
||||||
|
exists = 1;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!exists) {
|
||||||
|
for (n = 0; n < NMATCHES_PER_OFFSET; n++) {
|
||||||
|
lzsa_arrival *pDestArrival = &pDestSlots[n];
|
||||||
|
|
||||||
|
if (nRepCodingChoiceCost <= pDestArrival->cost) {
|
||||||
|
if (pDestArrival->from_slot) {
|
||||||
|
memmove(&pDestSlots[n + 1],
|
||||||
|
&pDestSlots[n],
|
||||||
|
sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1));
|
||||||
|
}
|
||||||
|
|
||||||
|
pDestArrival->cost = nRepCodingChoiceCost;
|
||||||
|
pDestArrival->from_pos = i;
|
||||||
|
pDestArrival->from_slot = j + 1;
|
||||||
|
pDestArrival->match_offset = nRepOffset;
|
||||||
|
pDestArrival->match_len = k;
|
||||||
|
pDestArrival->num_literals = 0;
|
||||||
|
pDestArrival->rep_offset = nRepOffset;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -391,7 +415,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
|
static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) {
|
||||||
int i;
|
int i;
|
||||||
int nNumLiterals = 0;
|
int nNumLiterals = 0;
|
||||||
|
int nPrevRepMatchOffset = 0;
|
||||||
int nRepMatchOffset = 0;
|
int nRepMatchOffset = 0;
|
||||||
|
int nRepMatchLen = 0;
|
||||||
|
int nRepIndex = 0;
|
||||||
int nDidReduce = 0;
|
int nDidReduce = 0;
|
||||||
|
|
||||||
for (i = nStartOffset; i < nEndOffset; ) {
|
for (i = nStartOffset; i < nEndOffset; ) {
|
||||||
@ -411,15 +438,17 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match */
|
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match */
|
||||||
|
|
||||||
if (nRepMatchOffset && pMatch->offset != nRepMatchOffset && (pBestMatch[nNextIndex].offset != pMatch->offset || pBestMatch[nNextIndex].offset == nRepMatchOffset ||
|
if (nRepMatchOffset && pMatch->offset != nRepMatchOffset && (pBestMatch[nNextIndex].offset != pMatch->offset || pBestMatch[nNextIndex].offset == nRepMatchOffset ||
|
||||||
((pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16))) >=
|
((pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16))) >
|
||||||
((pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16))))) {
|
((pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16))))) {
|
||||||
/* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
|
/* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
|
||||||
* matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the
|
* matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the
|
||||||
* ratio (the forward arrivals parser already has this covered). */
|
* ratio (the forward arrivals parser already has this covered). */
|
||||||
if (i >= nRepMatchOffset &&
|
if (i >= nRepMatchOffset &&
|
||||||
(i - nRepMatchOffset + pMatch->length) <= (nEndOffset - LAST_LITERALS) &&
|
(i - nRepMatchOffset + pMatch->length) <= (nEndOffset - LAST_LITERALS) &&
|
||||||
!memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length))
|
!memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length)) {
|
||||||
pMatch->offset = nRepMatchOffset;
|
pMatch->offset = nRepMatchOffset;
|
||||||
|
nDidReduce = 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
|
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
|
||||||
@ -431,6 +460,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
if (nMaxLen >= pMatch->length) {
|
if (nMaxLen >= pMatch->length) {
|
||||||
/* Replace */
|
/* Replace */
|
||||||
pMatch->offset = pBestMatch[nNextIndex].offset;
|
pMatch->offset = pBestMatch[nNextIndex].offset;
|
||||||
|
nDidReduce = 1;
|
||||||
}
|
}
|
||||||
else if (nMaxLen >= 2 && pMatch->offset != nRepMatchOffset) {
|
else if (nMaxLen >= 2 && pMatch->offset != nRepMatchOffset) {
|
||||||
int nPartialSizeBefore, nPartialSizeAfter;
|
int nPartialSizeBefore, nPartialSizeAfter;
|
||||||
@ -452,6 +482,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
pBestMatch[i + j].length = 0;
|
pBestMatch[i + j].length = 0;
|
||||||
}
|
}
|
||||||
pMatch->length = nMaxLen;
|
pMatch->length = nMaxLen;
|
||||||
|
nDidReduce = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -476,6 +507,15 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
if (pBestMatch[nNextIndex].offset != nRepMatchOffset)
|
if (pBestMatch[nNextIndex].offset != nRepMatchOffset)
|
||||||
nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
|
nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
|
||||||
|
|
||||||
|
int nReplaceRepOffset = 0;
|
||||||
|
if (nRepMatchOffset && nRepMatchOffset != nPrevRepMatchOffset && nRepMatchLen >= MIN_MATCH_SIZE_V2 && nRepMatchOffset != pBestMatch[nNextIndex].offset && nRepIndex >= pBestMatch[nNextIndex].offset &&
|
||||||
|
(nRepIndex - pBestMatch[nNextIndex].offset + nRepMatchLen) <= (nEndOffset - LAST_LITERALS) &&
|
||||||
|
!memcmp(pInWindow + nRepIndex - nRepMatchOffset, pInWindow + nRepIndex - pBestMatch[nNextIndex].offset, nRepMatchLen)) {
|
||||||
|
/* Replacing this match command by literals would let us create a repmatch */
|
||||||
|
nReplaceRepOffset = 1;
|
||||||
|
nReducedCommandSize -= (nRepMatchOffset <= 32) ? 4 : ((nRepMatchOffset <= 512) ? 8 : ((nRepMatchOffset <= (8192 + 512)) ? 12 : 16));
|
||||||
|
}
|
||||||
|
|
||||||
if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
|
if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
|
||||||
/* Reduce */
|
/* Reduce */
|
||||||
int nMatchLen = pMatch->length;
|
int nMatchLen = pMatch->length;
|
||||||
@ -486,6 +526,11 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
}
|
}
|
||||||
|
|
||||||
nDidReduce = 1;
|
nDidReduce = 1;
|
||||||
|
|
||||||
|
if (nReplaceRepOffset) {
|
||||||
|
pBestMatch[nRepIndex].offset = pBestMatch[nNextIndex].offset;
|
||||||
|
nRepMatchOffset = pBestMatch[nNextIndex].offset;
|
||||||
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -502,10 +547,14 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
|||||||
pMatch->length += pBestMatch[i + nMatchLen].length;
|
pMatch->length += pBestMatch[i + nMatchLen].length;
|
||||||
pBestMatch[i + nMatchLen].offset = 0;
|
pBestMatch[i + nMatchLen].offset = 0;
|
||||||
pBestMatch[i + nMatchLen].length = -1;
|
pBestMatch[i + nMatchLen].length = -1;
|
||||||
|
nDidReduce = 1;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nPrevRepMatchOffset = nRepMatchOffset;
|
||||||
nRepMatchOffset = pMatch->offset;
|
nRepMatchOffset = pMatch->offset;
|
||||||
|
nRepMatchLen = pMatch->length;
|
||||||
|
nRepIndex = i;
|
||||||
|
|
||||||
i += pMatch->length;
|
i += pMatch->length;
|
||||||
nNumLiterals = 0;
|
nNumLiterals = 0;
|
||||||
@ -586,6 +635,13 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa
|
|||||||
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
||||||
if (nOutOffset < 0) return -1;
|
if (nOutOffset < 0) return -1;
|
||||||
|
|
||||||
|
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
|
||||||
|
pCompressor->stats.min_literals = nNumLiterals;
|
||||||
|
if (nNumLiterals > pCompressor->stats.max_literals)
|
||||||
|
pCompressor->stats.max_literals = nNumLiterals;
|
||||||
|
pCompressor->stats.total_literals += nNumLiterals;
|
||||||
|
pCompressor->stats.literals_divisor++;
|
||||||
|
|
||||||
if (nNumLiterals != 0) {
|
if (nNumLiterals != 0) {
|
||||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||||
nOutOffset += nNumLiterals;
|
nOutOffset += nNumLiterals;
|
||||||
@ -608,11 +664,45 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa
|
|||||||
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
pOutData[nOutOffset++] = (-nMatchOffset) >> 8;
|
||||||
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
pOutData[nOutOffset++] = (-nMatchOffset) & 0xff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nMatchOffset == nRepMatchOffset)
|
||||||
|
pCompressor->stats.num_rep_offsets++;
|
||||||
|
|
||||||
nRepMatchOffset = nMatchOffset;
|
nRepMatchOffset = nMatchOffset;
|
||||||
|
|
||||||
nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
|
nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen);
|
||||||
if (nOutOffset < 0) return -1;
|
if (nOutOffset < 0) return -1;
|
||||||
|
|
||||||
|
if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1)
|
||||||
|
pCompressor->stats.min_offset = nMatchOffset;
|
||||||
|
if (nMatchOffset > pCompressor->stats.max_offset)
|
||||||
|
pCompressor->stats.max_offset = nMatchOffset;
|
||||||
|
pCompressor->stats.total_offsets += nMatchOffset;
|
||||||
|
|
||||||
|
if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1)
|
||||||
|
pCompressor->stats.min_match_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_match_len)
|
||||||
|
pCompressor->stats.max_match_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_match_lens += nMatchLen;
|
||||||
|
pCompressor->stats.match_divisor++;
|
||||||
|
|
||||||
|
if (nMatchOffset == 1) {
|
||||||
|
if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1)
|
||||||
|
pCompressor->stats.min_rle1_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_rle1_len)
|
||||||
|
pCompressor->stats.max_rle1_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_rle1_lens += nMatchLen;
|
||||||
|
pCompressor->stats.rle1_divisor++;
|
||||||
|
}
|
||||||
|
else if (nMatchOffset == 2) {
|
||||||
|
if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1)
|
||||||
|
pCompressor->stats.min_rle2_len = nMatchLen;
|
||||||
|
if (nMatchLen > pCompressor->stats.max_rle2_len)
|
||||||
|
pCompressor->stats.max_rle2_len = nMatchLen;
|
||||||
|
pCompressor->stats.total_rle2_lens += nMatchLen;
|
||||||
|
pCompressor->stats.rle2_divisor++;
|
||||||
|
}
|
||||||
|
|
||||||
i += nMatchLen;
|
i += nMatchLen;
|
||||||
|
|
||||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||||
@ -645,6 +735,13 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa
|
|||||||
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals);
|
||||||
if (nOutOffset < 0) return -1;
|
if (nOutOffset < 0) return -1;
|
||||||
|
|
||||||
|
if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1)
|
||||||
|
pCompressor->stats.min_literals = nNumLiterals;
|
||||||
|
if (nNumLiterals > pCompressor->stats.max_literals)
|
||||||
|
pCompressor->stats.max_literals = nNumLiterals;
|
||||||
|
pCompressor->stats.total_literals += nNumLiterals;
|
||||||
|
pCompressor->stats.literals_divisor++;
|
||||||
|
|
||||||
if (nNumLiterals != 0) {
|
if (nNumLiterals != 0) {
|
||||||
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals);
|
||||||
nOutOffset += nNumLiterals;
|
nOutOffset += nNumLiterals;
|
||||||
|
@ -69,6 +69,11 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
|
|||||||
pCompressor->flags = nFlags;
|
pCompressor->flags = nFlags;
|
||||||
pCompressor->safe_dist = 0;
|
pCompressor->safe_dist = 0;
|
||||||
pCompressor->num_commands = 0;
|
pCompressor->num_commands = 0;
|
||||||
|
|
||||||
|
memset(&pCompressor->stats, 0, sizeof(pCompressor->stats));
|
||||||
|
pCompressor->stats.min_literals = -1;
|
||||||
|
pCompressor->stats.min_match_len = -1;
|
||||||
|
pCompressor->stats.min_offset = -1;
|
||||||
|
|
||||||
if (!nResult) {
|
if (!nResult) {
|
||||||
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||||
@ -88,9 +93,6 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
|
|||||||
if (pCompressor->best_match) {
|
if (pCompressor->best_match) {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
else {
|
|
||||||
return 0;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,35 @@ typedef struct {
|
|||||||
unsigned short match_len;
|
unsigned short match_len;
|
||||||
} lzsa_arrival;
|
} lzsa_arrival;
|
||||||
|
|
||||||
|
/** Compression statistics */
|
||||||
|
typedef struct _lzsa_stats {
|
||||||
|
int min_literals;
|
||||||
|
int max_literals;
|
||||||
|
int total_literals;
|
||||||
|
|
||||||
|
int min_offset;
|
||||||
|
int max_offset;
|
||||||
|
int num_rep_offsets;
|
||||||
|
int total_offsets;
|
||||||
|
|
||||||
|
int min_match_len;
|
||||||
|
int max_match_len;
|
||||||
|
int total_match_lens;
|
||||||
|
|
||||||
|
int min_rle1_len;
|
||||||
|
int max_rle1_len;
|
||||||
|
int total_rle1_lens;
|
||||||
|
|
||||||
|
int min_rle2_len;
|
||||||
|
int max_rle2_len;
|
||||||
|
int total_rle2_lens;
|
||||||
|
|
||||||
|
int literals_divisor;
|
||||||
|
int match_divisor;
|
||||||
|
int rle1_divisor;
|
||||||
|
int rle2_divisor;
|
||||||
|
} lzsa_stats;
|
||||||
|
|
||||||
/** Compression context */
|
/** Compression context */
|
||||||
typedef struct _lzsa_compressor {
|
typedef struct _lzsa_compressor {
|
||||||
divsufsort_ctx_t divsufsort_context;
|
divsufsort_ctx_t divsufsort_context;
|
||||||
@ -93,6 +122,7 @@ typedef struct _lzsa_compressor {
|
|||||||
int safe_dist;
|
int safe_dist;
|
||||||
int num_commands;
|
int num_commands;
|
||||||
lzsa_hashmap_t cost_map;
|
lzsa_hashmap_t cost_map;
|
||||||
|
lzsa_stats stats;
|
||||||
} lzsa_compressor;
|
} lzsa_compressor;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -71,11 +71,12 @@ static void lzsa_delete_file(const char *pszInFilename) {
|
|||||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||||
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
||||||
|
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist) {
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
|
||||||
lzsa_stream_t inStream, outStream;
|
lzsa_stream_t inStream, outStream;
|
||||||
void *pDictionaryData = NULL;
|
void *pDictionaryData = NULL;
|
||||||
int nDictionaryDataSize = 0;
|
int nDictionaryDataSize = 0;
|
||||||
@ -99,7 +100,7 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
|
|||||||
return nStatus;
|
return nStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist);
|
nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats);
|
||||||
|
|
||||||
lzsa_dictionary_free(&pDictionaryData);
|
lzsa_dictionary_free(&pDictionaryData);
|
||||||
outStream.close(&outStream);
|
outStream.close(&outStream);
|
||||||
@ -129,12 +130,13 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
|
|||||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||||
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
||||||
|
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
||||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist) {
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) {
|
||||||
unsigned char *pInData, *pOutData;
|
unsigned char *pInData, *pOutData;
|
||||||
lzsa_compressor compressor;
|
lzsa_compressor compressor;
|
||||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||||
@ -289,6 +291,10 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut
|
|||||||
|
|
||||||
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
|
int nCommandCount = lzsa_compressor_get_command_count(&compressor);
|
||||||
int nSafeDist = compressor.safe_dist;
|
int nSafeDist = compressor.safe_dist;
|
||||||
|
|
||||||
|
if (pStats)
|
||||||
|
*pStats = compressor.stats;
|
||||||
|
|
||||||
lzsa_compressor_destroy(&compressor);
|
lzsa_compressor_destroy(&compressor);
|
||||||
|
|
||||||
free(pOutData);
|
free(pOutData);
|
||||||
|
@ -41,6 +41,7 @@ extern "C" {
|
|||||||
|
|
||||||
/* Forward declaration */
|
/* Forward declaration */
|
||||||
typedef enum _lzsa_status_t lzsa_status_t;
|
typedef enum _lzsa_status_t lzsa_status_t;
|
||||||
|
typedef struct _lzsa_stats lzsa_stats;
|
||||||
|
|
||||||
/*-------------- File API -------------- */
|
/*-------------- File API -------------- */
|
||||||
|
|
||||||
@ -58,12 +59,13 @@ typedef enum _lzsa_status_t lzsa_status_t;
|
|||||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||||
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
||||||
|
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
|
lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename,
|
||||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist);
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
|
||||||
|
|
||||||
/*-------------- Streaming API -------------- */
|
/*-------------- Streaming API -------------- */
|
||||||
|
|
||||||
@ -82,12 +84,13 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi
|
|||||||
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
* @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful
|
||||||
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
* @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful
|
||||||
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
* @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful
|
||||||
|
* @param pStats pointer to compression stats that are filled if this function is successful, or NULL
|
||||||
*
|
*
|
||||||
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
* @return LZSA_OK for success, or an error value from lzsa_status_t
|
||||||
*/
|
*/
|
||||||
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize,
|
||||||
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion,
|
||||||
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist);
|
void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user