From 06e6a1487159027b277fac83f8a8733283f9fec7 Mon Sep 17 00:00:00 2001 From: emmanuel-marty Date: Fri, 5 Apr 2019 16:32:11 +0200 Subject: [PATCH] Add optimization pass to reduce the number of command tokens in the compressed data blocks without changing the compression ratio --- src/main.c | 4 ++-- src/shrink.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/shrink.h | 8 +++++++ 3 files changed, 70 insertions(+), 2 deletions(-) diff --git a/src/main.c b/src/main.c index e6f542b..dd4b243 100755 --- a/src/main.c +++ b/src/main.c @@ -239,8 +239,8 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0; double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta; - fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %lld into %lld bytes ==> %g %%\n", - pszInFilename, fDelta, fSpeed, nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); + fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens, %lld into %lld bytes ==> %g %%\n", + pszInFilename, fDelta, fSpeed, lzsa_compressor_get_command_count(&compressor), nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize)); } lzsa_compressor_destroy(&compressor); diff --git a/src/shrink.c b/src/shrink.c index 97fbe23..00d0bc3 100755 --- a/src/shrink.c +++ b/src/shrink.c @@ -66,6 +66,7 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) pCompressor->pos_data = NULL; pCompressor->open_intervals = NULL; pCompressor->match = NULL; + pCompressor->num_commands = 0; if (pCompressor->intervals) { pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); @@ -558,6 +559,51 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart } } +/** + * Attempt to minimize the number of commands issued in the compressed data block, in order to speed up decompression without + * impacting the compression ratio + * + * @param pCompressor compression context + * @param nStartOffset current offset in input window (typically the number of previously compressed bytes) + * @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes + */ +static void lzsa_optimize_command_count(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) { + int i; + int nNumLiterals = 0; + + for (i = nStartOffset; i < nEndOffset; ) { + lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT); + + if (pMatch->length >= MIN_MATCH_SIZE) { + int nMatchOffset = pMatch->offset; + int nMatchLen = pMatch->length; + int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE; + int nNibbleLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x01; + int nTokenSize = 1 /* nibble */ + lzsa_get_literals_varlen_size(nNumLiterals) + (nNibbleLongOffset ? 2 : 1) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen); + + if ((((nNumLiterals + nMatchLen) < LITERALS_RUN_LEN && nTokenSize >= nMatchLen) || + ((nNumLiterals + nMatchLen) < (LITERALS_RUN_LEN + 254) && nTokenSize >= (nMatchLen + 1))) && + (i + nMatchLen) < nEndOffset && pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE) { + int j; + + for (j = 0; j < nMatchLen; j++) { + pCompressor->match[(i + j) << MATCHES_PER_OFFSET_SHIFT].length = 0; + } + nNumLiterals += nMatchLen; + i += nMatchLen; + } + else { + nNumLiterals = 0; + i += nMatchLen; + } + } + else { + nNumLiterals++; + i++; + } + } +} + /** * Emit block of compressed data * @@ -611,6 +657,8 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p } nOutOffset = lzsa_write_match_varlen(pOutData, nOutOffset, nEncodedMatchLen); i += nMatchLen; + + pCompressor->num_commands++; } else { if (nNumLiterals == 0) @@ -635,6 +683,8 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p nOutOffset += nNumLiterals; nNumLiterals = 0; } + + pCompressor->num_commands++; } return nOutOffset; @@ -659,6 +709,16 @@ int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWind } lzsa_find_all_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); lzsa_optimize_matches(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); + lzsa_optimize_command_count(pCompressor, nPreviousBlockSize, nPreviousBlockSize + nInDataSize); return lzsa_write_block(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize); } + +/** + * Get the number of compression commands issued in compressed data blocks + * + * @return number of commands + */ +int lzsa_compressor_get_command_count(lsza_compressor *pCompressor) { + return pCompressor->num_commands; +} diff --git a/src/shrink.h b/src/shrink.h index fbb48b5..ae6063a 100755 --- a/src/shrink.h +++ b/src/shrink.h @@ -32,6 +32,7 @@ typedef struct { unsigned int *pos_data; unsigned int *open_intervals; lzsa_match *match; + int num_commands; } lsza_compressor; /** @@ -65,4 +66,11 @@ void lzsa_compressor_destroy(lsza_compressor *pCompressor); */ int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize); +/** + * Get the number of compression commands issued in compressed data blocks + * + * @return number of commands + */ +int lzsa_compressor_get_command_count(lsza_compressor *pCompressor); + #endif /* _SHRINK_H */