From 3c4f535e0b36a5ca21929e70edde8c5c4db5908c Mon Sep 17 00:00:00 2001 From: Emmanuel Marty Date: Sun, 27 Oct 2019 14:55:39 +0100 Subject: [PATCH] Increase LZSA2 ratio by ~0.02% --- src/shrink_block_v2.c | 85 ++++++++++++++++++++++++++++--------------- 1 file changed, 56 insertions(+), 29 deletions(-) diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index 73865ed..289ba75 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -174,10 +174,64 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf return nOutOffset; } +/** + * Insert forward rep candidate + * + * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) + * @param i input data window position whose matches are being considered + * @param nMatchOffset match offset to use as rep candidate + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nEndOffset, int nDepth) { + lzsa_arrival *arrival = pCompressor->arrival; + int j; + + if (nDepth >= 10) return; + + for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { + int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; + + if (nMatchOffset != nRepOffset && nRepOffset && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_len >= MIN_MATCH_SIZE_V2) { + int nRepPos = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_pos; + int nRepLen = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_len; + + if (nRepPos > nMatchOffset && + (nRepPos - nMatchOffset + nRepLen) <= (nEndOffset - LAST_LITERALS) && + !memcmp(pInWindow + nRepPos - nRepOffset, pInWindow + nRepPos - nMatchOffset, nRepLen)) { + + lzsa_match *fwd_match = pCompressor->match + (nRepPos << 5); + int exists = 0; + int r; + + for (r = 0; r < 32 && fwd_match[r].length >= MIN_MATCH_SIZE_V2; r++) { + if (fwd_match[r].offset == nMatchOffset) { + exists = 1; + + if (fwd_match[r].length < nRepLen) { + fwd_match[r].length = nRepLen; + lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nEndOffset, nDepth + 1); + } + break; + } + } + + if (!exists && r < 32) { + fwd_match[r].offset = nMatchOffset; + fwd_match[r].length = nRepLen; + + lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nEndOffset, nDepth + 1); + } + } + } + } +} + /** * Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input * * @param pCompressor compression context + * @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress) * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes * @param nInsertForwardReps non-zero to insert forward repmatch candidates, zero to use the previously inserted candidates @@ -289,35 +343,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne while (j < NMATCHES_PER_OFFSET) nMaxRepLen[j++] = 0; - for (j = 0; j < NMATCHES_PER_OFFSET && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].from_slot; j++) { - int nRepOffset = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_offset; - - if (nMatchOffset != nRepOffset && nRepOffset && nInsertForwardReps && arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_len >= MIN_MATCH_SIZE_V2) { - int nRepPos = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_pos; - int nRepLen = arrival[(i << MATCHES_PER_OFFSET_SHIFT) + j].rep_len; - - if (nRepPos > nMatchOffset && - (nRepPos - nMatchOffset + nRepLen) <= (nEndOffset - LAST_LITERALS) && - !memcmp(pInWindow + nRepPos - nRepOffset, pInWindow + nRepPos - nMatchOffset, nRepLen)) { - - lzsa_match *fwd_match = pCompressor->match + (nRepPos << 5); - int exists = 0; - int r; - - for (r = 0; r < 32 && fwd_match[r].length >= MIN_MATCH_SIZE_V2; r++) { - if (fwd_match[r].offset == nMatchOffset) { - exists = 1; - break; - } - } - - if (!exists && r < 32) { - fwd_match[r].offset = nMatchOffset; - fwd_match[r].length = nRepLen; - } - } - } - } + if (nInsertForwardReps) + lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nEndOffset, 0); if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) nStartingMatchLen = nMatchLen;