When using -m, decompress ~7% faster by trading ~0.5% of compression ratio

This commit is contained in:
emmanuel-marty 2019-05-02 11:23:57 +02:00
parent 97dd3ffc1f
commit ef6b43a296
3 changed files with 50 additions and 20 deletions

View File

@ -34,8 +34,9 @@
#include "expand.h"
#define BLOCK_SIZE 65536
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_VERBOSE 1
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
/*---------------------------------------------------------------------------*/
@ -64,6 +65,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
lsza_compressor compressor;
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
int nFlags;
int nResult;
bool bError = false;
@ -108,7 +110,12 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
}
memset(pOutData, 0, BLOCK_SIZE);
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize);
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
if (nOptions & OPT_RAW)
nFlags |= LZSA_FLAG_RAW_BLOCK;
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags);
if (nResult != 0) {
free(pOutData);
pOutData = NULL;
@ -221,6 +228,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
fflush(stdout);
}
}
@ -228,11 +236,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
int nFooterSize;
if ((nOptions & OPT_RAW) != 0) {
cFooter[0] = 0x00; /* EOD marker for raw block */
cFooter[1] = 0xee;
cFooter[2] = 0x00;
cFooter[3] = 0x00;
nFooterSize = 4;
nFooterSize = 0;
}
else {
cFooter[0] = 0x00; /* EOD frame */
@ -254,7 +258,6 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
fflush(stdout);
}
lzsa_compressor_destroy(&compressor);
@ -698,7 +701,7 @@ int main(int argc, char **argv) {
bool bMinMatchDefined = false;
char cCommand = 'z';
int nMinMatchSize = MIN_MATCH_SIZE;
unsigned int nOptions = 0;
unsigned int nOptions = OPT_FAVOR_RATIO;
for (i = 1; i < argc; i++) {
if (!strcmp(argv[i], "-d")) {
@ -731,6 +734,7 @@ int main(int argc, char **argv) {
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
i++;
bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO);
}
else {
bArgsError = true;
@ -745,6 +749,7 @@ int main(int argc, char **argv) {
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
bMinMatchDefined = true;
nOptions &= (~OPT_FAVOR_RATIO);
}
else {
bArgsError = true;
@ -763,7 +768,8 @@ int main(int argc, char **argv) {
}
else if (!strcmp(argv[i], "--prefer-speed")) {
if (!bMinMatchDefined) {
nMinMatchSize = 4;
nMinMatchSize = 3;
nOptions &= (~OPT_FAVOR_RATIO);
bMinMatchDefined = true;
}
else
@ -802,8 +808,8 @@ int main(int argc, char **argv) {
fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default, same as -m 3)\n");
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m 4)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
return 100;
}

View File

@ -63,10 +63,11 @@ typedef struct _lzsa_match {
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize) {
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags) {
int nResult;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
@ -79,6 +80,7 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->min_match_size = MIN_MATCH_SIZE;
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
pCompressor->flags = nFlags;
pCompressor->num_commands = 0;
if (!nResult) {
@ -503,6 +505,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
int *cost = (int*)pCompressor->pos_data; /* Reuse */
int nLastLiteralsOffset;
int nMinMatchSize = pCompressor->min_match_size;
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
int i;
cost[nEndOffset - 1] = 8;
@ -541,7 +544,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost >= nCurCost) {
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = nMatchLen;
nBestMatchOffset = pMatch[m].offset;
@ -566,7 +569,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost >= nCurCost) {
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
@ -581,7 +584,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
nCurCost += MODESWITCH_PENALTY;
if (nBestCost >= nCurCost) {
if (nBestCost > (nCurCost - nFavorRatio)) {
nBestCost = nCurCost;
nBestMatchLen = k;
nBestMatchOffset = pMatch[m].offset;
@ -621,7 +624,7 @@ static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int n
int nMatchLen = pMatch->length;
int nReduce = 0;
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <FF> <ll> <ll> <offset> <offset> <FF> <mm> <mm> */ {
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
int nMatchOffset = pMatch->offset;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
@ -756,7 +759,10 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
else
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
if (nNumLiterals != 0) {
@ -768,6 +774,18 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
pCompressor->num_commands++;
}
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
/* Emit EOD marker for raw block */
if ((nOutOffset + 4) > nMaxOutDataSize)
return -1;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 238;
pOutData[nOutOffset++] = 0;
pOutData[nOutOffset++] = 0;
}
return nOutOffset;
}

View File

@ -25,6 +25,10 @@
#include "divsufsort.h"
/* Compression flags */
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
@ -36,6 +40,7 @@ typedef struct {
unsigned int *open_intervals;
lzsa_match *match;
int min_match_size;
int flags;
int num_commands;
} lsza_compressor;
@ -45,10 +50,11 @@ typedef struct {
* @param pCompressor compression context to initialize
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
* @param nFlags compression flags
*
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize);
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags);
/**
* Clean up compression context and free up any associated resources