From 29c6f3b2a3124a0721856500b56d2fd68f9f3926 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Thu, 26 Sep 2019 19:13:09 +0200 Subject: [PATCH 01/10] Remove erroneous else statement --- src/shrink_context.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/shrink_context.c b/src/shrink_context.c index 61f3d66..0acc7db 100644 --- a/src/shrink_context.c +++ b/src/shrink_context.c @@ -88,9 +88,6 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, if (pCompressor->best_match) { return 0; } - else { - return 0; - } } } } From 33327201f7d221ca5d9fb079b0781415be6c9b55 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Thu, 3 Oct 2019 16:58:34 +0200 Subject: [PATCH 02/10] Fix small LZSA2 token reduction inefficiency --- src/shrink_block_v2.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index 13e721d..efad653 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -416,10 +416,12 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un /* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when * matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the * ratio (the forward arrivals parser already has this covered). */ - if (i >= nRepMatchOffset && - (i - nRepMatchOffset + pMatch->length) <= (nEndOffset - LAST_LITERALS) && - !memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length)) + if (i >= nRepMatchOffset && + (i - nRepMatchOffset + pMatch->length) <= (nEndOffset - LAST_LITERALS) && + !memcmp(pInWindow + i - nRepMatchOffset, pInWindow + i - pMatch->offset, pMatch->length)) { pMatch->offset = nRepMatchOffset; + nDidReduce = 1; + } } if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) { @@ -431,6 +433,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un if (nMaxLen >= pMatch->length) { /* Replace */ pMatch->offset = pBestMatch[nNextIndex].offset; + nDidReduce = 1; } else if (nMaxLen >= 2 && pMatch->offset != nRepMatchOffset) { int nPartialSizeBefore, nPartialSizeAfter; @@ -452,6 +455,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un pBestMatch[i + j].length = 0; } pMatch->length = nMaxLen; + nDidReduce = 1; } } } @@ -502,6 +506,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un pMatch->length += pBestMatch[i + nMatchLen].length; pBestMatch[i + nMatchLen].offset = 0; pBestMatch[i + nMatchLen].length = -1; + nDidReduce = 1; continue; } From 39e2a90f8146e2b1ac1b5a787bc8679a7a3f105f Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Fri, 4 Oct 2019 11:54:54 +0200 Subject: [PATCH 03/10] Prevent small matchfinder inefficiency --- src/matchfinder.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/matchfinder.c b/src/matchfinder.c index f24f944..6862143 100644 --- a/src/matchfinder.c +++ b/src/matchfinder.c @@ -98,7 +98,9 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p nLen = 0; if (nLen > LCP_MAX) nLen = LCP_MAX; - int nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1)); + int nTaggedLen = 0; + if (nLen) + nTaggedLen = (nLen << TAG_BITS) | (lzsa_get_index_tag((unsigned int)nIndex) & ((1 << TAG_BITS) - 1)); intervals[i] = ((unsigned int)nIndex) | (((unsigned int)nTaggedLen) << LCP_SHIFT); } } From bfb383befdc91cb8eb51ee77f75e4f6a8578770e Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Tue, 8 Oct 2019 09:39:18 +0200 Subject: [PATCH 04/10] Speed up LZSA2 compression --- src/shrink_block_v2.c | 37 ++++++++++++++++++++++++------------- 1 file changed, 24 insertions(+), 13 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index efad653..ec32542 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -221,6 +221,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne n++) { if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) { exists = 1; + break; } } @@ -254,30 +255,40 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne int nMatchOffset = match[m].offset; int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)); int nStartingMatchLen, k; + int nMaxRepLen[NMATCHES_PER_OFFSET]; if ((i + nMatchLen) > (nEndOffset - LAST_LITERALS)) nMatchLen = nEndOffset - LAST_LITERALS - i; - if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) - nStartingMatchLen = nMatchLen; - else - nStartingMatchLen = nMinMatchSize; for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { - const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost; int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; - int nMaxRepLen = 0; + int nCurMaxRepLen = 0; if (nMatchOffset != nRepOffset && nRepOffset && i >= nRepOffset && (i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) { - while (nMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nMaxRepLen] == pInWindow[i - nMatchOffset + nMaxRepLen]) - nMaxRepLen++; + while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i - nMatchOffset + nCurMaxRepLen]) + nCurMaxRepLen++; } - for (k = nStartingMatchLen; k <= nMatchLen; k++) { - int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2); - lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT]; + nMaxRepLen[j] = nCurMaxRepLen; + } + while (j < NMATCHES_PER_OFFSET) + nMaxRepLen[j++] = 0; + + if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) + nStartingMatchLen = nMatchLen; + else + nStartingMatchLen = nMinMatchSize; + + for (k = nStartingMatchLen; k <= nMatchLen; k++) { + int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2); + lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT]; + + for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { + const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost; + int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost; int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost; @@ -322,7 +333,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne * matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions * of identical bytes, for instance. Checking for this provides a big compression win on some files. */ - if (i >= nRepOffset && nMaxRepLen >= k) { + if (nMaxRepLen[j] >= k) { /* A match is possible at the rep offset; insert the extra coding choice. */ nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + /* rep match - no offset cost */ nMatchLenCost; @@ -411,7 +422,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un /* This command is a match, is followed by 'nNextLiterals' literals and then by another match */ if (nRepMatchOffset && pMatch->offset != nRepMatchOffset && (pBestMatch[nNextIndex].offset != pMatch->offset || pBestMatch[nNextIndex].offset == nRepMatchOffset || - ((pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16))) >= + ((pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16))) > ((pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16))))) { /* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when * matching large regions of identical bytes for instance, where there are too many offsets to be considered by the parser, and when not compressing to favor the From 44df8f3d2dc1e71ed092224e243cc50c89694b24 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Tue, 8 Oct 2019 16:23:33 +0200 Subject: [PATCH 05/10] Add early-out, speed LZSA2 compression up further --- src/shrink_block_v2.c | 200 ++++++++++++++++++++++-------------------- 1 file changed, 103 insertions(+), 97 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index ec32542..19232c4 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -215,35 +215,37 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if (!nFavorRatio && nNumLiterals == 1) nCodingChoiceCost += MODESWITCH_PENALTY; - int exists = 0; - for (n = 0; - n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost; - n++) { - if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) { - exists = 1; - break; - } - } - - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { - - if (pDestArrival->from_slot) { - memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n + 1], - &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + NMATCHES_PER_OFFSET - 1].cost) { + int exists = 0; + for (n = 0; + n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost; + n++) { + if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) { + exists = 1; + break; } + } - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = 0; - pDestArrival->match_len = 0; - pDestArrival->num_literals = nNumLiterals; - pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; - break; + for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n]; + if (pDestArrival->from_slot == 0 || + nCodingChoiceCost <= pDestArrival->cost) { + + if (pDestArrival->from_slot) { + memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n + 1], + &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = 0; + pDestArrival->match_len = 0; + pDestArrival->num_literals = nNumLiterals; + pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; + break; + } } } } @@ -291,83 +293,87 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost; - int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost; + int nRepCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost; + int nCodingChoiceCost = nRepCodingChoiceCost + nMatchOffsetCost; int exists = 0; if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals) nCodingChoiceCost += MODESWITCH_PENALTY; - for (n = 0; - n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost; - n++) { - if (pDestSlots[n].rep_offset == nMatchOffset) { - exists = 1; - break; - } - } - - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; - - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { - - if (pDestArrival->from_slot) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); - } - - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = nMatchOffset; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->rep_offset = nMatchOffset; - break; - } - } - - /* If this coding choice doesn't rep-match, see if we still get a match by using the current repmatch offset for this arrival. This can occur (and not have the - * matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions - * of identical bytes, for instance. Checking for this provides a big compression win on some files. */ - - if (nMaxRepLen[j] >= k) { - /* A match is possible at the rep offset; insert the extra coding choice. */ - - nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + /* rep match - no offset cost */ nMatchLenCost; - exists = 0; - - for (n = 0; - n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost; - n++) { - if (pDestSlots[n].rep_offset == nRepOffset) { - exists = 1; - break; - } - } - - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; - - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { - - if (pDestArrival->from_slot) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nRepCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + for (n = 0; + n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost; + n++) { + if (pDestSlots[n].rep_offset == nMatchOffset) { + exists = 1; + break; } + } - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = nRepOffset; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->rep_offset = nRepOffset; - break; + for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; + + if (pDestArrival->from_slot == 0 || + nCodingChoiceCost <= pDestArrival->cost) { + + if (pDestArrival->from_slot) { + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = nMatchOffset; + pDestArrival->match_len = k; + pDestArrival->num_literals = 0; + pDestArrival->rep_offset = nMatchOffset; + break; + } + } + } + + /* If this coding choice doesn't rep-match, see if we still get a match by using the current repmatch offset for this arrival. This can occur (and not have the + * matchfinder offer the offset in the first place, or have too many choices with the same cost to retain the repmatchable offset) when compressing regions + * of identical bytes, for instance. Checking for this provides a big compression win on some files. */ + + if (nMaxRepLen[j] >= k) { + /* A match is possible at the rep offset; insert the extra coding choice. */ + + exists = 0; + + for (n = 0; + n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nRepCodingChoiceCost; + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; + } + } + + for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; + + if (pDestArrival->from_slot == 0 || + nRepCodingChoiceCost <= pDestArrival->cost) { + + if (pDestArrival->from_slot) { + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nRepCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = nRepOffset; + pDestArrival->match_len = k; + pDestArrival->num_literals = 0; + pDestArrival->rep_offset = nRepOffset; + break; + } } } } From 03f841d04f13c73b7c2724834b3eb9135a663dad Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Tue, 8 Oct 2019 20:26:21 +0200 Subject: [PATCH 06/10] Speed up LZSA2 compression --- src/shrink_block_v2.c | 79 +++++++++++++++++++++++-------------------- 1 file changed, 42 insertions(+), 37 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index 19232c4..8214f1e 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -295,13 +295,14 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost; int nRepCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost; int nCodingChoiceCost = nRepCodingChoiceCost + nMatchOffsetCost; - int exists = 0; if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals) nCodingChoiceCost += MODESWITCH_PENALTY; if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nRepCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + int exists = 0; + for (n = 0; n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost; n++) { @@ -311,26 +312,28 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } } - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; + if (!exists) { + for (n = 0; n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { + if (pDestArrival->from_slot == 0 || + nCodingChoiceCost <= pDestArrival->cost) { - if (pDestArrival->from_slot) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + if (pDestArrival->from_slot) { + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = nMatchOffset; + pDestArrival->match_len = k; + pDestArrival->num_literals = 0; + pDestArrival->rep_offset = nMatchOffset; + break; } - - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = nMatchOffset; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->rep_offset = nMatchOffset; - break; } } } @@ -340,9 +343,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne * of identical bytes, for instance. Checking for this provides a big compression win on some files. */ if (nMaxRepLen[j] >= k) { - /* A match is possible at the rep offset; insert the extra coding choice. */ + int exists = 0; - exists = 0; + /* A match is possible at the rep offset; insert the extra coding choice. */ for (n = 0; n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nRepCodingChoiceCost; @@ -353,26 +356,28 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } } - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; + if (!exists) { + for (n = 0; n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; - if (pDestArrival->from_slot == 0 || - nRepCodingChoiceCost <= pDestArrival->cost) { + if (pDestArrival->from_slot == 0 || + nRepCodingChoiceCost <= pDestArrival->cost) { - if (pDestArrival->from_slot) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + if (pDestArrival->from_slot) { + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nRepCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = nRepOffset; + pDestArrival->match_len = k; + pDestArrival->num_literals = 0; + pDestArrival->rep_offset = nRepOffset; + break; } - - pDestArrival->cost = nRepCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = nRepOffset; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->rep_offset = nRepOffset; - break; } } } From b09dadb1c1bdf15220b72ba6e99f4abf19200199 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Wed, 9 Oct 2019 13:16:29 +0200 Subject: [PATCH 07/10] Small LZSA2 token count reduction --- src/shrink_block_v2.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index 8214f1e..a86f440 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -413,7 +413,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset) { int i; int nNumLiterals = 0; + int nPrevRepMatchOffset = 0; int nRepMatchOffset = 0; + int nRepMatchLen = 0; + int nRepIndex = 0; int nDidReduce = 0; for (i = nStartOffset; i < nEndOffset; ) { @@ -502,6 +505,15 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un if (pBestMatch[nNextIndex].offset != nRepMatchOffset) nReducedCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16)); + int nReplaceRepOffset = 0; + if (nRepMatchOffset && nRepMatchOffset != nPrevRepMatchOffset && nRepMatchLen >= MIN_MATCH_SIZE_V2 && nRepMatchOffset != pBestMatch[nNextIndex].offset && nRepIndex >= pBestMatch[nNextIndex].offset && + (nRepIndex - pBestMatch[nNextIndex].offset + nRepMatchLen) <= (nEndOffset - LAST_LITERALS) && + !memcmp(pInWindow + nRepIndex - nRepMatchOffset, pInWindow + nRepIndex - pBestMatch[nNextIndex].offset, nRepMatchLen)) { + /* Replacing this match command by literals would let us create a repmatch */ + nReplaceRepOffset = 1; + nReducedCommandSize -= (nRepMatchOffset <= 32) ? 4 : ((nRepMatchOffset <= 512) ? 8 : ((nRepMatchOffset <= (8192 + 512)) ? 12 : 16)); + } + if (nOriginalCombinedCommandSize >= nReducedCommandSize) { /* Reduce */ int nMatchLen = pMatch->length; @@ -512,6 +524,11 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un } nDidReduce = 1; + + if (nReplaceRepOffset) { + pBestMatch[nRepIndex].offset = pBestMatch[nNextIndex].offset; + nRepMatchOffset = pBestMatch[nNextIndex].offset; + } continue; } } @@ -532,7 +549,10 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un continue; } + nPrevRepMatchOffset = nRepMatchOffset; nRepMatchOffset = pMatch->offset; + nRepMatchLen = pMatch->length; + nRepIndex = i; i += pMatch->length; nNumLiterals = 0; From 6ce846ff24397790ee11d385b4b91b68fa960399 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Wed, 9 Oct 2019 16:07:29 +0200 Subject: [PATCH 08/10] Speed up LZSA2 compression --- src/shrink_block_v2.c | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index a86f440..edddc10 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -192,13 +192,17 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT)); + for (i = (nStartOffset << MATCHES_PER_OFFSET_SHIFT); i != (nEndOffset << MATCHES_PER_OFFSET_SHIFT) - 1; i++) { + arrival[i].cost = 0x40000000; + } + arrival[nStartOffset << MATCHES_PER_OFFSET_SHIFT].from_slot = -1; for (i = nStartOffset; i != (nEndOffset - 1); i++) { int m, nMatches; for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { - int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost; + const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost & 0x3fffffff; int nCodingChoiceCost = nPrevCost + 8 /* literal */; int nNumLiterals = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals + 1; @@ -215,25 +219,25 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if (!nFavorRatio && nNumLiterals == 1) nCodingChoiceCost += MODESWITCH_PENALTY; - if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + NMATCHES_PER_OFFSET - 1].cost) { + lzsa_arrival *pDestSlots = &arrival[(i + 1) << MATCHES_PER_OFFSET_SHIFT]; + if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { int exists = 0; for (n = 0; - n < NMATCHES_PER_OFFSET && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].from_slot && arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].cost <= nCodingChoiceCost; + n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nCodingChoiceCost; n++) { - if (arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) { + if (pDestSlots[n].rep_offset == arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset) { exists = 1; break; } } for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; + if (nCodingChoiceCost <= pDestArrival->cost) { if (pDestArrival->from_slot) { - memmove(&arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n + 1], - &arrival[((i + 1) << MATCHES_PER_OFFSET_SHIFT) + n], + memmove(&pDestSlots[n + 1], + &pDestSlots[n], sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); } @@ -289,7 +293,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_OFFSET_SHIFT]; for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { - const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost; + const int nPrevCost = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].cost & 0x3fffffff; int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; int nMatchOffsetCost = (nMatchOffset == nRepOffset) ? 0 : nNoRepmatchOffsetCost; @@ -304,7 +308,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne int exists = 0; for (n = 0; - n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nCodingChoiceCost; + n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nCodingChoiceCost; n++) { if (pDestSlots[n].rep_offset == nMatchOffset) { exists = 1; @@ -316,9 +320,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne for (n = 0; n < NMATCHES_PER_OFFSET; n++) { lzsa_arrival *pDestArrival = &pDestSlots[n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost <= pDestArrival->cost) { - + if (nCodingChoiceCost <= pDestArrival->cost) { if (pDestArrival->from_slot) { memmove(&pDestSlots[n + 1], &pDestSlots[n], @@ -348,7 +350,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne /* A match is possible at the rep offset; insert the extra coding choice. */ for (n = 0; - n < NMATCHES_PER_OFFSET && pDestSlots[n].from_slot && pDestSlots[n].cost <= nRepCodingChoiceCost; + n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nRepCodingChoiceCost; n++) { if (pDestSlots[n].rep_offset == nRepOffset) { exists = 1; @@ -360,9 +362,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne for (n = 0; n < NMATCHES_PER_OFFSET; n++) { lzsa_arrival *pDestArrival = &pDestSlots[n]; - if (pDestArrival->from_slot == 0 || - nRepCodingChoiceCost <= pDestArrival->cost) { - + if (nRepCodingChoiceCost <= pDestArrival->cost) { if (pDestArrival->from_slot) { memmove(&pDestSlots[n + 1], &pDestSlots[n], From c1b18fb9fd1a043a44d6f821ea009b7a400d51d9 Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Wed, 9 Oct 2019 18:20:22 +0200 Subject: [PATCH 09/10] Implement -stats --- src/lzsa.c | 37 +++++++++++++++++++++++++++++++- src/shrink_block_v1.c | 45 +++++++++++++++++++++++++++++++++++++++ src/shrink_block_v2.c | 48 ++++++++++++++++++++++++++++++++++++++++++ src/shrink_context.c | 5 +++++ src/shrink_context.h | 30 ++++++++++++++++++++++++++ src/shrink_streaming.c | 12 ++++++++--- src/shrink_streaming.h | 7 ++++-- 7 files changed, 178 insertions(+), 6 deletions(-) diff --git a/src/lzsa.c b/src/lzsa.c index 0d85cf8..2574562 100755 --- a/src/lzsa.c +++ b/src/lzsa.c @@ -46,6 +46,7 @@ #define OPT_RAW 2 #define OPT_FAVOR_RATIO 4 #define OPT_RAW_BACKWARD 8 +#define OPT_STATS 16 #define TOOL_VERSION "1.1.0" @@ -104,6 +105,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co int nCommandCount = 0, nSafeDist = 0; int nFlags; lzsa_status_t nStatus; + lzsa_stats stats; nFlags = 0; if (nOptions & OPT_FAVOR_RATIO) @@ -117,7 +119,7 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co nStartTime = do_get_time(); } - nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist); + nStatus = lzsa_compress_file(pszInFilename, pszOutFilename, pszDictionaryFilename, nFlags, nMinMatchSize, nFormatVersion, compression_progress, &nOriginalSize, &nCompressedSize, &nCommandCount, &nSafeDist, &stats); if ((nOptions & OPT_VERBOSE)) { nEndTime = do_get_time(); @@ -149,6 +151,32 @@ static int do_compress(const char *pszInFilename, const char *pszOutFilename, co } } + if (nOptions & OPT_STATS) { + if (stats.literals_divisor > 0) + fprintf(stdout, "Literals: min: %d avg: %d max: %d count: %d\n", stats.min_literals, stats.total_literals / stats.literals_divisor, stats.max_literals, stats.literals_divisor); + else + fprintf(stdout, "Literals: none\n"); + if (stats.match_divisor > 0) { + fprintf(stdout, "Offsets: min: %d avg: %d max: %d reps: %d count: %d\n", stats.min_offset, stats.total_offsets / stats.match_divisor, stats.max_offset, stats.num_rep_offsets, stats.match_divisor); + fprintf(stdout, "Match lens: min: %d avg: %d max: %d count: %d\n", stats.min_match_len, stats.total_match_lens / stats.match_divisor, stats.max_match_len, stats.match_divisor); + } + else { + fprintf(stdout, "Offsets: none\n"); + fprintf(stdout, "Match lens: none\n"); + } + if (stats.rle1_divisor > 0) { + fprintf(stdout, "RLE1 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle1_len, stats.total_rle1_lens / stats.rle1_divisor, stats.max_rle1_len, stats.rle1_divisor); + } + else { + fprintf(stdout, "RLE1 lens: none\n"); + } + if (stats.rle2_divisor > 0) { + fprintf(stdout, "RLE2 lens: min: %d avg: %d max: %d count: %d\n", stats.min_rle2_len, stats.total_rle2_lens / stats.rle2_divisor, stats.max_rle2_len, stats.rle2_divisor); + } + else { + fprintf(stdout, "RLE2 lens: none\n"); + } + } return 0; } @@ -1009,6 +1037,13 @@ int main(int argc, char **argv) { else bArgsError = true; } + else if (!strcmp(argv[i], "-stats")) { + if ((nOptions & OPT_STATS) == 0) { + nOptions |= OPT_STATS; + } + else + bArgsError = true; + } else { if (!pszInFilename) pszInFilename = argv[i]; diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c index c785f70..1182c56 100644 --- a/src/shrink_block_v1.c +++ b/src/shrink_block_v1.c @@ -380,6 +380,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char pOutData[nOutOffset++] = nTokenLongOffset | (nTokenLiteralsLen << 4) | nTokenMatchLen; nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1) + pCompressor->stats.min_literals = nNumLiterals; + if (nNumLiterals > pCompressor->stats.max_literals) + pCompressor->stats.max_literals = nNumLiterals; + pCompressor->stats.total_literals += nNumLiterals; + pCompressor->stats.literals_divisor++; + if (nNumLiterals != 0) { memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); nOutOffset += nNumLiterals; @@ -391,6 +398,37 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char pOutData[nOutOffset++] = (-nMatchOffset) >> 8; } nOutOffset = lzsa_write_match_varlen_v1(pOutData, nOutOffset, nEncodedMatchLen); + + if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1) + pCompressor->stats.min_offset = nMatchOffset; + if (nMatchOffset > pCompressor->stats.max_offset) + pCompressor->stats.max_offset = nMatchOffset; + pCompressor->stats.total_offsets += nMatchOffset; + + if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1) + pCompressor->stats.min_match_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_match_len) + pCompressor->stats.max_match_len = nMatchLen; + pCompressor->stats.total_match_lens += nMatchLen; + pCompressor->stats.match_divisor++; + + if (nMatchOffset == 1) { + if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1) + pCompressor->stats.min_rle1_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle1_len) + pCompressor->stats.max_rle1_len = nMatchLen; + pCompressor->stats.total_rle1_lens += nMatchLen; + pCompressor->stats.rle1_divisor++; + } + else if (nMatchOffset == 2) { + if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1) + pCompressor->stats.min_rle2_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle2_len) + pCompressor->stats.max_rle2_len = nMatchLen; + pCompressor->stats.total_rle2_lens += nMatchLen; + pCompressor->stats.rle2_divisor++; + } + i += nMatchLen; if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { @@ -422,6 +460,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, const unsigned char pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00; nOutOffset = lzsa_write_literals_varlen_v1(pOutData, nOutOffset, nNumLiterals); + if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1) + pCompressor->stats.min_literals = nNumLiterals; + if (nNumLiterals > pCompressor->stats.max_literals) + pCompressor->stats.max_literals = nNumLiterals; + pCompressor->stats.total_literals += nNumLiterals; + pCompressor->stats.literals_divisor++; + if (nNumLiterals != 0) { memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); nOutOffset += nNumLiterals; diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index edddc10..9003e44 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -633,6 +633,13 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); if (nOutOffset < 0) return -1; + if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1) + pCompressor->stats.min_literals = nNumLiterals; + if (nNumLiterals > pCompressor->stats.max_literals) + pCompressor->stats.max_literals = nNumLiterals; + pCompressor->stats.total_literals += nNumLiterals; + pCompressor->stats.literals_divisor++; + if (nNumLiterals != 0) { memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); nOutOffset += nNumLiterals; @@ -655,11 +662,45 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa pOutData[nOutOffset++] = (-nMatchOffset) >> 8; pOutData[nOutOffset++] = (-nMatchOffset) & 0xff; } + + if (nMatchOffset == nRepMatchOffset) + pCompressor->stats.num_rep_offsets++; + nRepMatchOffset = nMatchOffset; nOutOffset = lzsa_write_match_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nEncodedMatchLen); if (nOutOffset < 0) return -1; + if (nMatchOffset < pCompressor->stats.min_offset || pCompressor->stats.min_offset == -1) + pCompressor->stats.min_offset = nMatchOffset; + if (nMatchOffset > pCompressor->stats.max_offset) + pCompressor->stats.max_offset = nMatchOffset; + pCompressor->stats.total_offsets += nMatchOffset; + + if (nMatchLen < pCompressor->stats.min_match_len || pCompressor->stats.min_match_len == -1) + pCompressor->stats.min_match_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_match_len) + pCompressor->stats.max_match_len = nMatchLen; + pCompressor->stats.total_match_lens += nMatchLen; + pCompressor->stats.match_divisor++; + + if (nMatchOffset == 1) { + if (nMatchLen < pCompressor->stats.min_rle1_len || pCompressor->stats.min_rle1_len == -1) + pCompressor->stats.min_rle1_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle1_len) + pCompressor->stats.max_rle1_len = nMatchLen; + pCompressor->stats.total_rle1_lens += nMatchLen; + pCompressor->stats.rle1_divisor++; + } + else if (nMatchOffset == 2) { + if (nMatchLen < pCompressor->stats.min_rle2_len || pCompressor->stats.min_rle2_len == -1) + pCompressor->stats.min_rle2_len = nMatchLen; + if (nMatchLen > pCompressor->stats.max_rle2_len) + pCompressor->stats.max_rle2_len = nMatchLen; + pCompressor->stats.total_rle2_lens += nMatchLen; + pCompressor->stats.rle2_divisor++; + } + i += nMatchLen; if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) { @@ -692,6 +733,13 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, lzsa_match *pBestMa nOutOffset = lzsa_write_literals_varlen_v2(pOutData, nOutOffset, nMaxOutDataSize, &nCurNibbleOffset, &nCurFreeNibbles, nNumLiterals); if (nOutOffset < 0) return -1; + if (nNumLiterals < pCompressor->stats.min_literals || pCompressor->stats.min_literals == -1) + pCompressor->stats.min_literals = nNumLiterals; + if (nNumLiterals > pCompressor->stats.max_literals) + pCompressor->stats.max_literals = nNumLiterals; + pCompressor->stats.total_literals += nNumLiterals; + pCompressor->stats.literals_divisor++; + if (nNumLiterals != 0) { memcpy(pOutData + nOutOffset, pInWindow + nInFirstLiteralOffset, nNumLiterals); nOutOffset += nNumLiterals; diff --git a/src/shrink_context.c b/src/shrink_context.c index 0acc7db..e1cc9c6 100644 --- a/src/shrink_context.c +++ b/src/shrink_context.c @@ -69,6 +69,11 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, pCompressor->flags = nFlags; pCompressor->safe_dist = 0; pCompressor->num_commands = 0; + + memset(&pCompressor->stats, 0, sizeof(pCompressor->stats)); + pCompressor->stats.min_literals = -1; + pCompressor->stats.min_match_len = -1; + pCompressor->stats.min_offset = -1; if (!nResult) { pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); diff --git a/src/shrink_context.h b/src/shrink_context.h index a63ddac..fd8176b 100644 --- a/src/shrink_context.h +++ b/src/shrink_context.h @@ -79,6 +79,35 @@ typedef struct { unsigned short match_len; } lzsa_arrival; +/** Compression statistics */ +typedef struct _lzsa_stats { + int min_literals; + int max_literals; + int total_literals; + + int min_offset; + int max_offset; + int num_rep_offsets; + int total_offsets; + + int min_match_len; + int max_match_len; + int total_match_lens; + + int min_rle1_len; + int max_rle1_len; + int total_rle1_lens; + + int min_rle2_len; + int max_rle2_len; + int total_rle2_lens; + + int literals_divisor; + int match_divisor; + int rle1_divisor; + int rle2_divisor; +} lzsa_stats; + /** Compression context */ typedef struct _lzsa_compressor { divsufsort_ctx_t divsufsort_context; @@ -93,6 +122,7 @@ typedef struct _lzsa_compressor { int safe_dist; int num_commands; lzsa_hashmap_t cost_map; + lzsa_stats stats; } lzsa_compressor; /** diff --git a/src/shrink_streaming.c b/src/shrink_streaming.c index 821f528..2e1cf12 100644 --- a/src/shrink_streaming.c +++ b/src/shrink_streaming.c @@ -71,11 +71,12 @@ static void lzsa_delete_file(const char *pszInFilename) { * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful + * @param pStats pointer to compression stats that are filled if this function is successful, or NULL * * @return LZSA_OK for success, or an error value from lzsa_status_t */ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist) { + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) { lzsa_stream_t inStream, outStream; void *pDictionaryData = NULL; int nDictionaryDataSize = 0; @@ -99,7 +100,7 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi return nStatus; } - nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist); + nStatus = lzsa_compress_stream(&inStream, &outStream, pDictionaryData, nDictionaryDataSize, nFlags, nMinMatchSize, nFormatVersion, progress, pOriginalSize, pCompressedSize, pCommandCount, pSafeDist, pStats); lzsa_dictionary_free(&pDictionaryData); outStream.close(&outStream); @@ -129,12 +130,13 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful + * @param pStats pointer to compression stats that are filled if this function is successful, or NULL * * @return LZSA_OK for success, or an error value from lzsa_status_t */ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist) { + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats) { unsigned char *pInData, *pOutData; lzsa_compressor compressor; long long nOriginalSize = 0LL, nCompressedSize = 0LL; @@ -289,6 +291,10 @@ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOut int nCommandCount = lzsa_compressor_get_command_count(&compressor); int nSafeDist = compressor.safe_dist; + + if (pStats) + *pStats = compressor.stats; + lzsa_compressor_destroy(&compressor); free(pOutData); diff --git a/src/shrink_streaming.h b/src/shrink_streaming.h index 1e66bdc..0920edf 100644 --- a/src/shrink_streaming.h +++ b/src/shrink_streaming.h @@ -41,6 +41,7 @@ extern "C" { /* Forward declaration */ typedef enum _lzsa_status_t lzsa_status_t; +typedef struct _lzsa_stats lzsa_stats; /*-------------- File API -------------- */ @@ -58,12 +59,13 @@ typedef enum _lzsa_status_t lzsa_status_t; * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful + * @param pStats pointer to compression stats that are filled if this function is successful, or NULL * * @return LZSA_OK for success, or an error value from lzsa_status_t */ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist); + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats); /*-------------- Streaming API -------------- */ @@ -82,12 +84,13 @@ lzsa_status_t lzsa_compress_file(const char *pszInFilename, const char *pszOutFi * @param pCompressedSize pointer to returned output(compressed) size, updated when this function is successful * @param pCommandCount pointer to returned token(compression commands) count, updated when this function is successful * @param pSafeDist pointer to return safe distance for raw blocks, updated when this function is successful + * @param pStats pointer to compression stats that are filled if this function is successful, or NULL * * @return LZSA_OK for success, or an error value from lzsa_status_t */ lzsa_status_t lzsa_compress_stream(lzsa_stream_t *pInStream, lzsa_stream_t *pOutStream, const void *pDictionaryData, int nDictionaryDataSize, const unsigned int nFlags, const int nMinMatchSize, const int nFormatVersion, - void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist); + void(*progress)(long long nOriginalSize, long long nCompressedSize), long long *pOriginalSize, long long *pCompressedSize, int *pCommandCount, int *pSafeDist, lzsa_stats *pStats); #ifdef __cplusplus } From d209b73a307cc5b743b91c877c409158cdd4ca1b Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Thu, 10 Oct 2019 14:42:08 +0200 Subject: [PATCH 10/10] Fix small bug --- src/shrink_block_v2.c | 42 ++++++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 20 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index 9003e44..96452c4 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -192,7 +192,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne memset(arrival + (nStartOffset << MATCHES_PER_OFFSET_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset) << MATCHES_PER_OFFSET_SHIFT)); - for (i = (nStartOffset << MATCHES_PER_OFFSET_SHIFT); i != (nEndOffset << MATCHES_PER_OFFSET_SHIFT) - 1; i++) { + for (i = (nStartOffset << MATCHES_PER_OFFSET_SHIFT); i != (nEndOffset << MATCHES_PER_OFFSET_SHIFT); i++) { arrival[i].cost = 0x40000000; } @@ -220,7 +220,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne nCodingChoiceCost += MODESWITCH_PENALTY; lzsa_arrival *pDestSlots = &arrival[(i + 1) << MATCHES_PER_OFFSET_SHIFT]; - if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + if (nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { int exists = 0; for (n = 0; n < NMATCHES_PER_OFFSET && pDestSlots[n].cost <= nCodingChoiceCost; @@ -231,24 +231,26 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } } - for (n = 0; !exists && n < NMATCHES_PER_OFFSET; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; - if (nCodingChoiceCost <= pDestArrival->cost) { + if (!exists) { + for (n = 0; n < NMATCHES_PER_OFFSET; n++) { + lzsa_arrival *pDestArrival = &pDestSlots[n]; + if (nCodingChoiceCost <= pDestArrival->cost) { - if (pDestArrival->from_slot) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + if (pDestArrival->from_slot) { + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (NMATCHES_PER_OFFSET - n - 1)); + } + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->from_pos = i; + pDestArrival->from_slot = j + 1; + pDestArrival->match_offset = 0; + pDestArrival->match_len = 0; + pDestArrival->num_literals = nNumLiterals; + pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; + break; } - - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_offset = 0; - pDestArrival->match_len = 0; - pDestArrival->num_literals = nNumLiterals; - pDestArrival->rep_offset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; - break; } } } @@ -303,8 +305,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if (!nFavorRatio && !arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].num_literals) nCodingChoiceCost += MODESWITCH_PENALTY; - if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nRepCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { - if (pDestSlots[NMATCHES_PER_OFFSET - 1].from_slot == 0 || nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + if (nRepCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { + if (nCodingChoiceCost <= pDestSlots[NMATCHES_PER_OFFSET - 1].cost) { int exists = 0; for (n = 0;