mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2024-12-23 22:29:27 +00:00
Add --prefer-ratio, --prefer-speed, -m options
This commit is contained in:
parent
f837ed096e
commit
331d6f9911
61
src/main.c
61
src/main.c
@ -58,7 +58,7 @@ static long long lzsa_get_time() {
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
|
||||
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions, const int nMinMatchSize) {
|
||||
FILE *f_in, *f_out;
|
||||
unsigned char *pInData, *pOutData;
|
||||
lsza_compressor compressor;
|
||||
@ -108,7 +108,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
}
|
||||
memset(pOutData, 0, BLOCK_SIZE);
|
||||
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2);
|
||||
nResult = lzsa_compressor_init(&compressor, BLOCK_SIZE * 2, nMinMatchSize);
|
||||
if (nResult != 0) {
|
||||
free(pOutData);
|
||||
pOutData = NULL;
|
||||
@ -251,8 +251,8 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
|
||||
double fDelta = ((double)(nEndTime - nStartTime)) / 1000000.0;
|
||||
double fSpeed = ((double)nOriginalSize / 1048576.0) / fDelta;
|
||||
int nCommands = lzsa_compressor_get_command_count(&compressor);
|
||||
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%lld bytes/token), %lld into %lld bytes ==> %g %%\n",
|
||||
pszInFilename, fDelta, fSpeed, nCommands, nOriginalSize / ((long long)nCommands),
|
||||
fprintf(stdout, "\rCompressed '%s' in %g seconds, %.02g Mb/s, %d tokens (%g bytes/token), %lld into %lld bytes ==> %g %%\n",
|
||||
pszInFilename, fDelta, fSpeed, nCommands, (double)nOriginalSize / (double)nCommands,
|
||||
nOriginalSize, nCompressedSize, (double)(nCompressedSize * 100.0 / nOriginalSize));
|
||||
fflush(stdout);
|
||||
}
|
||||
@ -695,7 +695,9 @@ int main(int argc, char **argv) {
|
||||
bool bArgsError = false;
|
||||
bool bCommandDefined = false;
|
||||
bool bVerifyCompression = false;
|
||||
bool bMinMatchDefined = false;
|
||||
char cCommand = 'z';
|
||||
int nMinMatchSize = MIN_MATCH_SIZE;
|
||||
unsigned int nOptions = 0;
|
||||
|
||||
for (i = 1; i < argc; i++) {
|
||||
@ -722,6 +724,52 @@ int main(int argc, char **argv) {
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-m")) {
|
||||
if (!bMinMatchDefined && (i + 1) < argc) {
|
||||
char *pEnd = NULL;
|
||||
nMinMatchSize = (int)strtol(argv[i + 1], &pEnd, 10);
|
||||
if (pEnd && pEnd != argv[i + 1] && (nMinMatchSize >= MIN_MATCH_SIZE || nMinMatchSize < MATCH_RUN_LEN)) {
|
||||
i++;
|
||||
bMinMatchDefined = true;
|
||||
}
|
||||
else {
|
||||
bArgsError = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strncmp(argv[i], "-m", 2)) {
|
||||
if (!bMinMatchDefined) {
|
||||
char *pEnd = NULL;
|
||||
nMinMatchSize = (int)strtol(argv[i] + 2, &pEnd, 10);
|
||||
if (pEnd && pEnd != (argv[i]+2) && (nMinMatchSize >= MIN_MATCH_SIZE || nMinMatchSize < MATCH_RUN_LEN)) {
|
||||
i++;
|
||||
bMinMatchDefined = true;
|
||||
}
|
||||
else {
|
||||
bArgsError = true;
|
||||
}
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "--prefer-ratio")) {
|
||||
if (!bMinMatchDefined) {
|
||||
nMinMatchSize = MIN_MATCH_SIZE;
|
||||
bMinMatchDefined = true;
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "--prefer-speed")) {
|
||||
if (!bMinMatchDefined) {
|
||||
nMinMatchSize = 4;
|
||||
bMinMatchDefined = true;
|
||||
}
|
||||
else
|
||||
bArgsError = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-v")) {
|
||||
if ((nOptions & OPT_VERBOSE) == 0) {
|
||||
nOptions |= OPT_VERBOSE;
|
||||
@ -754,11 +802,14 @@ int main(int argc, char **argv) {
|
||||
fprintf(stderr, " -d: decompress (default: compress)\n");
|
||||
fprintf(stderr, " -v: be verbose\n");
|
||||
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
|
||||
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
|
||||
fprintf(stderr, " --prefer-ratio: favor compression ratio (default, same as -m 3)\n");
|
||||
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m 4)\n");
|
||||
return 100;
|
||||
}
|
||||
|
||||
if (cCommand == 'z') {
|
||||
int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions);
|
||||
int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions, nMinMatchSize);
|
||||
if (nResult == 0 && bVerifyCompression) {
|
||||
nResult = lzsa_compare(pszOutFilename, pszInFilename, nOptions);
|
||||
}
|
||||
|
16
src/shrink.c
16
src/shrink.c
@ -60,10 +60,11 @@ typedef struct _lzsa_match {
|
||||
*
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) {
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize) {
|
||||
int nResult;
|
||||
|
||||
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
||||
@ -71,6 +72,11 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize)
|
||||
pCompressor->pos_data = NULL;
|
||||
pCompressor->open_intervals = NULL;
|
||||
pCompressor->match = NULL;
|
||||
pCompressor->min_match_size = nMinMatchSize;
|
||||
if (pCompressor->min_match_size < MIN_MATCH_SIZE)
|
||||
pCompressor->min_match_size = MIN_MATCH_SIZE;
|
||||
else if (pCompressor->min_match_size > (MATCH_RUN_LEN - 1))
|
||||
pCompressor->min_match_size = MATCH_RUN_LEN - 1;
|
||||
pCompressor->num_commands = 0;
|
||||
|
||||
if (!nResult) {
|
||||
@ -167,10 +173,11 @@ static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned
|
||||
* saves us from having to build the inverse suffix array index, as the LCP is calculated without it using this method,
|
||||
* and the interval builder below doesn't need it either. */
|
||||
intervals[0] &= POS_MASK;
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
for (i = 1; i < nInWindowSize - 1; i++) {
|
||||
int nIndex = (int)(intervals[i] & POS_MASK);
|
||||
int nLen = PLCP[nIndex];
|
||||
if (nLen < MIN_MATCH_SIZE)
|
||||
if (nLen < nMinMatchSize)
|
||||
nLen = 0;
|
||||
if (nLen > LCP_MAX)
|
||||
nLen = LCP_MAX;
|
||||
@ -493,6 +500,7 @@ static inline int lzsa_write_match_varlen(unsigned char *pOutData, int nOutOffse
|
||||
static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStartOffset, const int nEndOffset) {
|
||||
int *cost = (int*)pCompressor->pos_data; /* Reuse */
|
||||
int nLastLiteralsOffset;
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
int i;
|
||||
|
||||
cost[nEndOffset - 1] = 1;
|
||||
@ -514,7 +522,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
lzsa_match *pMatch = pCompressor->match + (i << MATCHES_PER_OFFSET_SHIFT);
|
||||
int m;
|
||||
|
||||
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= MIN_MATCH_SIZE; m++) {
|
||||
for (m = 0; m < NMATCHES_PER_OFFSET && pMatch[m].length >= nMinMatchSize; m++) {
|
||||
int nMatchOffsetSize = (pMatch[m].offset <= 256) ? 1 : 2;
|
||||
|
||||
if (pMatch[m].length >= LEAVE_ALONE_MATCH_SIZE) {
|
||||
@ -544,7 +552,7 @@ static void lzsa_optimize_matches(lsza_compressor *pCompressor, const int nStart
|
||||
if (nMatchRunLen > MATCH_RUN_LEN)
|
||||
nMatchRunLen = MATCH_RUN_LEN;
|
||||
|
||||
for (k = MIN_MATCH_SIZE; k < nMatchRunLen; k++) {
|
||||
for (k = nMinMatchSize; k < nMatchRunLen; k++) {
|
||||
int nCurCost;
|
||||
|
||||
nCurCost = 1 + nMatchOffsetSize /* no extra match len bytes */;
|
||||
|
@ -35,6 +35,7 @@ typedef struct {
|
||||
unsigned int *pos_data;
|
||||
unsigned int *open_intervals;
|
||||
lzsa_match *match;
|
||||
int min_match_size;
|
||||
int num_commands;
|
||||
} lsza_compressor;
|
||||
|
||||
@ -43,10 +44,11 @@ typedef struct {
|
||||
*
|
||||
* @param pCompressor compression context to initialize
|
||||
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
||||
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
||||
*
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize);
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize);
|
||||
|
||||
/**
|
||||
* Clean up compression context and free up any associated resources
|
||||
|
Loading…
Reference in New Issue
Block a user