mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-01-11 12:30:24 +00:00
When using -m, decompress ~7% faster by trading ~0.5% of compression ratio
This commit is contained in:
parent
97dd3ffc1f
commit
ef6b43a296
32
src/main.c
32
src/main.c
@ -34,8 +34,9 @@
|
||||
#include "expand.h"
|
||||
|
||||
#define BLOCK_SIZE 65536
|
||||
#define OPT_VERBOSE 1
|
||||
#define OPT_RAW 2
|
||||
#define OPT_VERBOSE 1
|
||||
#define OPT_RAW 2
|
||||
#define OPT_FAVOR_RATIO 4
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
@ -64,6 +65,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
lsza_compressor compressor;
|
||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||
int nFlags;
|
||||
int nResult;
|
||||
bool bError = false;
|
||||
|
||||
@ -108,7 +110,12 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
}
|
||||
memset(pOutData, 0, BLOCK_SIZE);
|
||||
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize);
|
||||
nFlags = 0;
|
||||
if (nOptions & OPT_FAVOR_RATIO)
|
||||
nFlags |= LZSA_FLAG_FAVOR_RATIO;
|
||||
if (nOptions & OPT_RAW)
|
||||
nFlags |= LZSA_FLAG_RAW_BLOCK;
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags);
|
||||
if (nResult != 0) {
|
||||
free(pOutData);
|
||||
pOutData = NULL;
|
||||
@ -221,6 +228,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
|
||||
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
|
||||
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
||||
fflush(stdout);
|
||||
}
|
||||
}
|
||||
|
||||
@ -228,11 +236,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
int nFooterSize;
|
||||
|
||||
if ((nOptions & OPT_RAW) != 0) {
|
||||
cFooter[0] = 0x00; /* EOD marker for raw block */
|
||||
cFooter[1] = 0xee;
|
||||
cFooter[2] = 0x00;
|
||||
cFooter[3] = 0x00;
|
||||
nFooterSize = 4;
|
||||
nFooterSize = 0;
|
||||
}
|
||||
else {
|
||||
cFooter[0] = 0x00; /* EOD frame */
|
||||
@ -254,7 +258,6 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
|
||||
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
|
||||
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
||||
fflush(stdout);
|
||||
}
|
||||
|
||||
lzsa_compressor_destroy(&compressor);
|
||||
@ -698,7 +701,7 @@ int main(int argc, char **argv) {
|
||||
bool bMinMatchDefined = false;
|
||||
char cCommand = 'z';
|
||||
int nMinMatchSize = MIN_MATCH_SIZE;
|
||||
unsigned int nOptions = 0;
|
||||
unsigned int nOptions = OPT_FAVOR_RATIO;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
if (!strcmp(argv[i], "-d")) {
|
||||
@ -731,6 +734,7 @@ int main(int argc, char **argv) {
|
||||
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
||||
i++;
|
||||
bMinMatchDefined = true;
|
||||
nOptions &= (~OPT_FAVOR_RATIO);
|
||||
}
|
||||
else {
|
||||
bArgsError = true;
|
||||
@ -745,6 +749,7 @@ int main(int argc, char **argv) {
|
||||
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
|
||||
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
||||
bMinMatchDefined = true;
|
||||
nOptions &= (~OPT_FAVOR_RATIO);
|
||||
}
|
||||
else {
|
||||
bArgsError = true;
|
||||
@ -763,7 +768,8 @@ int main(int argc, char **argv) {
|
||||
}
|
||||
else if (!strcmp(argv[i], "--prefer-speed")) {
|
||||
if (!bMinMatchDefined) {
|
||||
nMinMatchSize = 4;
|
||||
nMinMatchSize = 3;
|
||||
nOptions &= (~OPT_FAVOR_RATIO);
|
||||
bMinMatchDefined = true;
|
||||
}
|
||||
else
|
||||
@ -802,8 +808,8 @@ int main(int argc, char **argv) {
|
||||
fprintf(stderr, " -v: be verbose\n");
|
||||
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
|
||||
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
|
||||
fprintf(stderr, " --prefer-ratio: favor compression ratio (default, same as -m 3)\n");
|
||||
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m 4)\n");
|
||||
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
|
||||
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
|
||||
return 100;
|
||||
}
|
||||
|
||||
|
30
src/shrink.c
30
src/shrink.c
@ -63,10 +63,11 @@ typedef struct _lzsa_match {
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
* @param nFlags compression flags
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize) {
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags) {
|
||||
int nResult;
|
||||
|
||||
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
||||
@ -79,6 +80,7 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize,
|
||||
pCompressor->min_match_size = MIN_MATCH_SIZE;
|
||||
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
|
||||
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
|
||||
pCompressor->flags = nFlags;
|
||||
pCompressor->num_commands = 0;
|
||||
|
||||
if (!nResult) {
|
||||
@ -503,6 +505,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
||||
int nLastLiteralsOffset;
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
|
||||
int i;
|
||||
|
||||
cost[nEndOffset - 1] = 8;
|
||||
@ -541,7 +544,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = nMatchLen;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
@ -566,7 +569,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
@ -581,7 +584,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||
nCurCost += MODESWITCH_PENALTY;
|
||||
|
||||
if (nBestCost >= nCurCost) {
|
||||
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||
nBestCost = nCurCost;
|
||||
nBestMatchLen = k;
|
||||
nBestMatchOffset = pMatch[m].offset;
|
||||
@ -621,7 +624,7 @@ static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int n
|
||||
int nMatchLen = pMatch->length;
|
||||
int nReduce = 0;
|
||||
|
||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <FF> <ll> <ll> <offset> <offset> <FF> <mm> <mm> */ {
|
||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
|
||||
int nMatchOffset = pMatch->offset;
|
||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
|
||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||
@ -756,7 +759,10 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
||||
else
|
||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
|
||||
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
|
||||
|
||||
if (nNumLiterals != 0) {
|
||||
@ -768,6 +774,18 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
pCompressor->num_commands++;
|
||||
}
|
||||
|
||||
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||
/* Emit EOD marker for raw block */
|
||||
|
||||
if ((nOutOffset + 4) > nMaxOutDataSize)
|
||||
return -1;
|
||||
|
||||
pOutData[nOutOffset++] = 0;
|
||||
pOutData[nOutOffset++] = 238;
|
||||
pOutData[nOutOffset++] = 0;
|
||||
pOutData[nOutOffset++] = 0;
|
||||
}
|
||||
|
||||
return nOutOffset;
|
||||
}
|
||||
|
||||
|
@ -25,6 +25,10 @@
|
||||
|
||||
#include "divsufsort.h"
|
||||
|
||||
/* Compression flags */
|
||||
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
|
||||
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
|
||||
|
||||
/* Forward declarations */
|
||||
typedef struct _lzsa_match lzsa_match;
|
||||
|
||||
@ -36,6 +40,7 @@ typedef struct {
|
||||
unsigned int *open_intervals;
|
||||
lzsa_match *match;
|
||||
int min_match_size;
|
||||
int flags;
|
||||
int num_commands;
|
||||
} lsza_compressor;
|
||||
|
||||
@ -45,10 +50,11 @@ typedef struct {
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
* @param nFlags compression flags
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize);
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags);
|
||||
|
||||
/**
|
||||
* Clean up compression context and free up any associated resources
|
||||
|
Loading…
x
Reference in New Issue
Block a user