mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-02-28 07:29:21 +00:00
When using -m, decompress ~7% faster by trading ~0.5% of compression ratio
This commit is contained in:
parent
97dd3ffc1f
commit
ef6b43a296
32
src/main.c
32
src/main.c
@ -34,8 +34,9 @@
|
|||||||
#include "expand.h"
|
#include "expand.h"
|
||||||
|
|
||||||
#define BLOCK_SIZE 65536
|
#define BLOCK_SIZE 65536
|
||||||
#define OPT_VERBOSE 1
|
#define OPT_VERBOSE 1
|
||||||
#define OPT_RAW 2
|
#define OPT_RAW 2
|
||||||
|
#define OPT_FAVOR_RATIO 4
|
||||||
|
|
||||||
/*---------------------------------------------------------------------------*/
|
/*---------------------------------------------------------------------------*/
|
||||||
|
|
||||||
@ -64,6 +65,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
|||||||
lsza_compressor compressor;
|
lsza_compressor compressor;
|
||||||
long long nStartTime = 0LL, nEndTime = 0LL;
|
long long nStartTime = 0LL, nEndTime = 0LL;
|
||||||
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
long long nOriginalSize = 0LL, nCompressedSize = 0LL;
|
||||||
|
int nFlags;
|
||||||
int nResult;
|
int nResult;
|
||||||
bool bError = false;
|
bool bError = false;
|
||||||
|
|
||||||
@ -108,7 +110,12 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
|||||||
}
|
}
|
||||||
memset(pOutData, 0, BLOCK_SIZE);
|
memset(pOutData, 0, BLOCK_SIZE);
|
||||||
|
|
||||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize);
|
nFlags = 0;
|
||||||
|
if (nOptions & OPT_FAVOR_RATIO)
|
||||||
|
nFlags |= LZSA_FLAG_FAVOR_RATIO;
|
||||||
|
if (nOptions & OPT_RAW)
|
||||||
|
nFlags |= LZSA_FLAG_RAW_BLOCK;
|
||||||
|
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize, nFlags);
|
||||||
if (nResult != 0) {
|
if (nResult != 0) {
|
||||||
free(pOutData);
|
free(pOutData);
|
||||||
pOutData = NULL;
|
pOutData = NULL;
|
||||||
@ -221,6 +228,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
|||||||
|
|
||||||
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
|
if (!bError && !feof(f_in) && nOriginalSize >= 1024 * 1024) {
|
||||||
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
fprintf(stdout, "\r%lld => %lld (%g %%)", nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
||||||
|
fflush(stdout);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -228,11 +236,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
|||||||
int nFooterSize;
|
int nFooterSize;
|
||||||
|
|
||||||
if ((nOptions & OPT_RAW) != 0) {
|
if ((nOptions & OPT_RAW) != 0) {
|
||||||
cFooter[0] = 0x00; /* EOD marker for raw block */
|
nFooterSize = 0;
|
||||||
cFooter[1] = 0xee;
|
|
||||||
cFooter[2] = 0x00;
|
|
||||||
cFooter[3] = 0x00;
|
|
||||||
nFooterSize = 4;
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
cFooter[0] = 0x00; /* EOD frame */
|
cFooter[0] = 0x00; /* EOD frame */
|
||||||
@ -254,7 +258,6 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
|||||||
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
|
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
|
||||||
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
|
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
|
||||||
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
||||||
fflush(stdout);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
lzsa_compressor_destroy(&compressor);
|
lzsa_compressor_destroy(&compressor);
|
||||||
@ -698,7 +701,7 @@ int main(int argc, char **argv) {
|
|||||||
bool bMinMatchDefined = false;
|
bool bMinMatchDefined = false;
|
||||||
char cCommand = 'z';
|
char cCommand = 'z';
|
||||||
int nMinMatchSize = MIN_MATCH_SIZE;
|
int nMinMatchSize = MIN_MATCH_SIZE;
|
||||||
unsigned int nOptions = 0;
|
unsigned int nOptions = OPT_FAVOR_RATIO;
|
||||||
|
|
||||||
for (i = 1; i < argc; i++) {
|
for (i = 1; i < argc; i++) {
|
||||||
if (!strcmp(argv[i], "-d")) {
|
if (!strcmp(argv[i], "-d")) {
|
||||||
@ -731,6 +734,7 @@ int main(int argc, char **argv) {
|
|||||||
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
||||||
i++;
|
i++;
|
||||||
bMinMatchDefined = true;
|
bMinMatchDefined = true;
|
||||||
|
nOptions &= (~OPT_FAVOR_RATIO);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bArgsError = true;
|
bArgsError = true;
|
||||||
@ -745,6 +749,7 @@ int main(int argc, char **argv) {
|
|||||||
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
|
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
|
||||||
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE && nMinMatchSize < MATCH_RUN_LEN)) {
|
||||||
bMinMatchDefined = true;
|
bMinMatchDefined = true;
|
||||||
|
nOptions &= (~OPT_FAVOR_RATIO);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
bArgsError = true;
|
bArgsError = true;
|
||||||
@ -763,7 +768,8 @@ int main(int argc, char **argv) {
|
|||||||
}
|
}
|
||||||
else if (!strcmp(argv[i], "--prefer-speed")) {
|
else if (!strcmp(argv[i], "--prefer-speed")) {
|
||||||
if (!bMinMatchDefined) {
|
if (!bMinMatchDefined) {
|
||||||
nMinMatchSize = 4;
|
nMinMatchSize = 3;
|
||||||
|
nOptions &= (~OPT_FAVOR_RATIO);
|
||||||
bMinMatchDefined = true;
|
bMinMatchDefined = true;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
@ -802,8 +808,8 @@ int main(int argc, char **argv) {
|
|||||||
fprintf(stderr, " -v: be verbose\n");
|
fprintf(stderr, " -v: be verbose\n");
|
||||||
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
|
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
|
||||||
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
|
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
|
||||||
fprintf(stderr, " --prefer-ratio: favor compression ratio (default, same as -m 3)\n");
|
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
|
||||||
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m 4)\n");
|
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
|
||||||
return 100;
|
return 100;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
30
src/shrink.c
30
src/shrink.c
@ -63,10 +63,11 @@ typedef struct _lzsa_match {
|
|||||||
* @param pCompressor compression context to initialize
|
* @param pCompressor compression context to initialize
|
||||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||||
|
* @param nFlags compression flags
|
||||||
*
|
*
|
||||||
* @return 0 for success, non-zero for failure
|
* @return 0 for success, non-zero for failure
|
||||||
*/
|
*/
|
||||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize) {
|
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags) {
|
||||||
int nResult;
|
int nResult;
|
||||||
|
|
||||||
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
||||||
@ -79,6 +80,7 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize,
|
|||||||
pCompressor->min_match_size = MIN_MATCH_SIZE;
|
pCompressor->min_match_size = MIN_MATCH_SIZE;
|
||||||
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
|
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
|
||||||
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
|
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
|
||||||
|
pCompressor->flags = nFlags;
|
||||||
pCompressor->num_commands = 0;
|
pCompressor->num_commands = 0;
|
||||||
|
|
||||||
if (!nResult) {
|
if (!nResult) {
|
||||||
@ -503,6 +505,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
|||||||
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
||||||
int nLastLiteralsOffset;
|
int nLastLiteralsOffset;
|
||||||
int nMinMatchSize = pCompressor->min_match_size;
|
int nMinMatchSize = pCompressor->min_match_size;
|
||||||
|
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
|
||||||
int i;
|
int i;
|
||||||
|
|
||||||
cost[nEndOffset - 1] = 8;
|
cost[nEndOffset - 1] = 8;
|
||||||
@ -541,7 +544,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
|||||||
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
if (pCompressor->match[(i + nMatchLen) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||||
nCurCost += MODESWITCH_PENALTY;
|
nCurCost += MODESWITCH_PENALTY;
|
||||||
|
|
||||||
if (nBestCost >= nCurCost) {
|
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||||
nBestCost = nCurCost;
|
nBestCost = nCurCost;
|
||||||
nBestMatchLen = nMatchLen;
|
nBestMatchLen = nMatchLen;
|
||||||
nBestMatchOffset = pMatch[m].offset;
|
nBestMatchOffset = pMatch[m].offset;
|
||||||
@ -566,7 +569,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
|||||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||||
nCurCost += MODESWITCH_PENALTY;
|
nCurCost += MODESWITCH_PENALTY;
|
||||||
|
|
||||||
if (nBestCost >= nCurCost) {
|
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||||
nBestCost = nCurCost;
|
nBestCost = nCurCost;
|
||||||
nBestMatchLen = k;
|
nBestMatchLen = k;
|
||||||
nBestMatchOffset = pMatch[m].offset;
|
nBestMatchOffset = pMatch[m].offset;
|
||||||
@ -581,7 +584,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
|||||||
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
if (pCompressor->match[(i + k) << MATCHES_PER_OFFSET_SHIFT].length >= MIN_MATCH_SIZE)
|
||||||
nCurCost += MODESWITCH_PENALTY;
|
nCurCost += MODESWITCH_PENALTY;
|
||||||
|
|
||||||
if (nBestCost >= nCurCost) {
|
if (nBestCost > (nCurCost - nFavorRatio)) {
|
||||||
nBestCost = nCurCost;
|
nBestCost = nCurCost;
|
||||||
nBestMatchLen = k;
|
nBestMatchLen = k;
|
||||||
nBestMatchOffset = pMatch[m].offset;
|
nBestMatchOffset = pMatch[m].offset;
|
||||||
@ -621,7 +624,7 @@ static int lzsa_optimize_command_count(lsza_compressor *pCompressor, const int n
|
|||||||
int nMatchLen = pMatch->length;
|
int nMatchLen = pMatch->length;
|
||||||
int nReduce = 0;
|
int nReduce = 0;
|
||||||
|
|
||||||
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <FF> <ll> <ll> <offset> <offset> <FF> <mm> <mm> */ {
|
if (nMatchLen <= 9 && (i + nMatchLen) < nEndOffset) /* max reducable command size: <token> <EE> <ll> <ll> <offset> <offset> <EE> <mm> <mm> */ {
|
||||||
int nMatchOffset = pMatch->offset;
|
int nMatchOffset = pMatch->offset;
|
||||||
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
|
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE;
|
||||||
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size(nNumLiterals) + ((nMatchOffset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size(nEncodedMatchLen);
|
||||||
@ -756,7 +759,10 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
|||||||
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
|
||||||
return -1;
|
return -1;
|
||||||
|
|
||||||
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)
|
||||||
|
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x0f;
|
||||||
|
else
|
||||||
|
pOutData[nOutOffset++] = (nTokenLiteralsLen << 4) | 0x00;
|
||||||
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
|
nOutOffset = lzsa_write_literals_varlen(pOutData, nOutOffset, nNumLiterals);
|
||||||
|
|
||||||
if (nNumLiterals != 0) {
|
if (nNumLiterals != 0) {
|
||||||
@ -768,6 +774,18 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
|||||||
pCompressor->num_commands++;
|
pCompressor->num_commands++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pCompressor->flags & LZSA_FLAG_RAW_BLOCK) {
|
||||||
|
/* Emit EOD marker for raw block */
|
||||||
|
|
||||||
|
if ((nOutOffset + 4) > nMaxOutDataSize)
|
||||||
|
return -1;
|
||||||
|
|
||||||
|
pOutData[nOutOffset++] = 0;
|
||||||
|
pOutData[nOutOffset++] = 238;
|
||||||
|
pOutData[nOutOffset++] = 0;
|
||||||
|
pOutData[nOutOffset++] = 0;
|
||||||
|
}
|
||||||
|
|
||||||
return nOutOffset;
|
return nOutOffset;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -25,6 +25,10 @@
|
|||||||
|
|
||||||
#include "divsufsort.h"
|
#include "divsufsort.h"
|
||||||
|
|
||||||
|
/* Compression flags */
|
||||||
|
#define LZSA_FLAG_FAVOR_RATIO (1<<0) /**< 1 to compress with the best ratio, 0 to trade some compression ratio for extra decompression speed */
|
||||||
|
#define LZSA_FLAG_RAW_BLOCK (1<<1) /**< 1 to emit raw block */
|
||||||
|
|
||||||
/* Forward declarations */
|
/* Forward declarations */
|
||||||
typedef struct _lzsa_match lzsa_match;
|
typedef struct _lzsa_match lzsa_match;
|
||||||
|
|
||||||
@ -36,6 +40,7 @@ typedef struct {
|
|||||||
unsigned int *open_intervals;
|
unsigned int *open_intervals;
|
||||||
lzsa_match *match;
|
lzsa_match *match;
|
||||||
int min_match_size;
|
int min_match_size;
|
||||||
|
int flags;
|
||||||
int num_commands;
|
int num_commands;
|
||||||
} lsza_compressor;
|
} lsza_compressor;
|
||||||
|
|
||||||
@ -45,10 +50,11 @@ typedef struct {
|
|||||||
* @param pCompressor compression context to initialize
|
* @param pCompressor compression context to initialize
|
||||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||||
|
* @param nFlags compression flags
|
||||||
*
|
*
|
||||||
* @return 0 for success, non-zero for failure
|
* @return 0 for success, non-zero for failure
|
||||||
*/
|
*/
|
||||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize);
|
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFlags);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Clean up compression context and free up any associated resources
|
* Clean up compression context and free up any associated resources
|
||||||
|
Loading…
x
Reference in New Issue
Block a user