Implement dictionary support

This commit is contained in:
emmanuel-marty 2019-05-02 18:38:57 +02:00
parent ef6b43a296
commit bab5225e4c

View File

@ -59,7 +59,7 @@ static long long lzsa_get_time() {
/*---------------------------------------------------------------------------*/
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions, const int nMinMatchSize) {
static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions, const int nMinMatchSize) {
FILE *f_in, *f_out;
unsigned char *pInData, *pOutData;
lsza_compressor compressor;
@ -110,6 +110,35 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
}
memset(pOutData, 0, BLOCK_SIZE);
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *f_dictionary = fopen(pszDictionaryFilename, "rb");
if (!f_dictionary) {
free(pOutData);
pOutData = NULL;
free(pInData);
pInData = NULL;
fclose(f_out);
f_out = NULL;
fclose(f_in);
f_in = NULL;
fprintf(stderr, "error opening dictionary '%s' for reading\n", pszInFilename);
return 100;
}
nDictionaryDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, f_dictionary);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(f_dictionary);
f_dictionary = NULL;
}
nFlags = 0;
if (nOptions & OPT_FAVOR_RATIO)
nFlags |= LZSA_FLAG_FAVOR_RATIO;
@ -150,20 +179,24 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
int nPreviousBlockSize = 0;
if (nDictionaryDataSize)
nPreviousBlockSize = nDictionaryDataSize;
while (!feof(f_in) && !bError) {
int nInDataSize;
if (nPreviousBlockSize) {
memcpy(pInData, pInData + BLOCK_SIZE, nPreviousBlockSize);
memcpy(pInData + BLOCK_SIZE - nPreviousBlockSize, pInData + BLOCK_SIZE, nPreviousBlockSize);
}
nInDataSize = (int)fread(pInData + BLOCK_SIZE, 1, BLOCK_SIZE, f_in);
if (nInDataSize > 0) {
if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0) {
if (nPreviousBlockSize && (nOptions & OPT_RAW) != 0 && !nDictionaryDataSize) {
fprintf(stderr, "error: raw blocks can only be used with files <= 64 Kb\n");
bError = true;
break;
}
nDictionaryDataSize = 0;
int nOutDataSize;
@ -285,7 +318,7 @@ static int lzsa_compress(const char *pszInFilename, const char *pszOutFilename,
/*---------------------------------------------------------------------------*/
static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
unsigned int nFileSize = 0;
@ -366,6 +399,34 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
return 100;
}
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening dictionary file\n");
return 100;
}
nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time();
}
@ -373,6 +434,10 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
int nDecompressionError = 0;
int nPrevDecompressedSize = 0;
if (nDictionaryDataSize) {
nPrevDecompressedSize = nDictionaryDataSize;
}
while (!feof(pInFile) && !nDecompressionError) {
unsigned int nBlockSize = 0;
@ -469,7 +534,7 @@ static int lzsa_decompress(const char *pszInFilename, const char *pszOutFilename
}
}
static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const unsigned int nOptions) {
static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, const char *pszDictionaryFilename, const unsigned int nOptions) {
long long nStartTime = 0LL, nEndTime = 0LL;
long long nOriginalSize = 0LL;
long long nKnownGoodSize = 0LL;
@ -569,6 +634,37 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
return 100;
}
int nDictionaryDataSize = 0;
if (pszDictionaryFilename) {
FILE *pDictionaryFile = fopen(pszDictionaryFilename, "rb");
if (!pDictionaryFile) {
free(pCompareData);
pCompareData = NULL;
free(pOutData);
pOutData = NULL;
free(pInBlock);
pInBlock = NULL;
fclose(pOutFile);
pOutFile = NULL;
fclose(pInFile);
pInFile = NULL;
fprintf(stderr, "error opening dictionary file\n");
return 100;
}
nDictionaryDataSize = (int)fread(pOutData + BLOCK_SIZE, 1, BLOCK_SIZE - 1, pDictionaryFile);
if (nDictionaryDataSize < 0)
nDictionaryDataSize = 0;
fclose(pDictionaryFile);
pDictionaryFile = NULL;
}
if (nOptions & OPT_VERBOSE) {
nStartTime = lzsa_get_time();
}
@ -577,6 +673,10 @@ static int lzsa_compare(const char *pszInFilename, const char *pszOutFilename, c
bool bComparisonError = false;
int nPrevDecompressedSize = 0;
if (nDictionaryDataSize) {
nPrevDecompressedSize = nDictionaryDataSize;
}
while (!feof(pInFile) && !nDecompressionError && !bComparisonError) {
unsigned int nBlockSize = 0;
@ -695,6 +795,7 @@ int main(int argc, char **argv) {
int i;
const char *pszInFilename = NULL;
const char *pszOutFilename = NULL;
const char *pszDictionaryFilename = NULL;
bool bArgsError = false;
bool bCommandDefined = false;
bool bVerifyCompression = false;
@ -727,6 +828,21 @@ int main(int argc, char **argv) {
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-D")) {
if (!pszDictionaryFilename && (i + 1) < argc) {
pszDictionaryFilename = argv[i + 1];
i++;
}
else
bArgsError = true;
}
else if (!strncmp(argv[i], "-D", 2)) {
if (!pszDictionaryFilename) {
pszDictionaryFilename = argv[i] + 2;
}
else
bArgsError = true;
}
else if (!strcmp(argv[i], "-m")) {
if (!bMinMatchDefined && (i + 1) < argc) {
char *pEnd = NULL;
@ -807,6 +923,7 @@ int main(int argc, char **argv) {
fprintf(stderr, " -d: decompress (default: compress)\n");
fprintf(stderr, " -v: be verbose\n");
fprintf(stderr, " -r: raw block format (max. 64 Kb files)\n");
fprintf(stderr, " -D <filename>: use dictionary file\n");
fprintf(stderr, " -m <value>: minimum match size (3-14) (default: 3)\n");
fprintf(stderr, " --prefer-ratio: favor compression ratio (default)\n");
fprintf(stderr, " --prefer-speed: favor decompression speed (same as -m3)\n");
@ -814,13 +931,13 @@ int main(int argc, char **argv) {
}
if (cCommand == 'z') {
int nResult = lzsa_compress(pszInFilename, pszOutFilename, nOptions, nMinMatchSize);
int nResult = lzsa_compress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions, nMinMatchSize);
if (nResult == 0 && bVerifyCompression) {
nResult = lzsa_compare(pszOutFilename, pszInFilename, nOptions);
nResult = lzsa_compare(pszOutFilename, pszInFilename, pszDictionaryFilename, nOptions);
}
}
else if (cCommand == 'd') {
return lzsa_decompress(pszInFilename, pszOutFilename, nOptions);
return lzsa_decompress(pszInFilename, pszOutFilename, pszDictionaryFilename, nOptions);
}
else {
return 100;