mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2024-06-13 22:29:32 +00:00
Compare commits
3 Commits
8cea101625
...
583e4db62e
Author | SHA1 | Date | |
---|---|---|---|
|
583e4db62e | ||
|
398885a52d | ||
|
21a0dc70c8 |
14
src/lzsa.c
14
src/lzsa.c
|
@ -527,7 +527,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, co
|
||||||
/* Try to compress it, expected to succeed */
|
/* Try to compress it, expected to succeed */
|
||||||
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
|
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
|
||||||
nFlags, nMinMatchSize, nFormatVersion);
|
nFlags, nMinMatchSize, nFormatVersion);
|
||||||
if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
|
if (nActualCompressedSize == (size_t)-1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
|
||||||
free(pTmpDecompressedData);
|
free(pTmpDecompressedData);
|
||||||
pTmpDecompressedData = NULL;
|
pTmpDecompressedData = NULL;
|
||||||
free(pTmpCompressedData);
|
free(pTmpCompressedData);
|
||||||
|
@ -545,7 +545,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, co
|
||||||
size_t nActualDecompressedSize;
|
size_t nActualDecompressedSize;
|
||||||
int nDecFormatVersion = nFormatVersion;
|
int nDecFormatVersion = nFormatVersion;
|
||||||
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion);
|
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion);
|
||||||
if (nActualDecompressedSize == -1) {
|
if (nActualDecompressedSize == (size_t)-1) {
|
||||||
free(pTmpDecompressedData);
|
free(pTmpDecompressedData);
|
||||||
pTmpDecompressedData = NULL;
|
pTmpDecompressedData = NULL;
|
||||||
free(pTmpCompressedData);
|
free(pTmpCompressedData);
|
||||||
|
@ -691,7 +691,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
|
||||||
long long t0 = do_get_time();
|
long long t0 = do_get_time();
|
||||||
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
|
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
|
||||||
long long t1 = do_get_time();
|
long long t1 = do_get_time();
|
||||||
if (nActualCompressedSize == -1) {
|
if (nActualCompressedSize == (size_t)-1) {
|
||||||
free(pCompressedData);
|
free(pCompressedData);
|
||||||
free(pFileData);
|
free(pFileData);
|
||||||
fprintf(stderr, "compression error\n");
|
fprintf(stderr, "compression error\n");
|
||||||
|
@ -699,7 +699,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
|
||||||
}
|
}
|
||||||
|
|
||||||
long long nCurDecTime = t1 - t0;
|
long long nCurDecTime = t1 - t0;
|
||||||
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
|
if (nBestCompTime == (size_t)-1 || nBestCompTime > nCurDecTime)
|
||||||
nBestCompTime = nCurDecTime;
|
nBestCompTime = nCurDecTime;
|
||||||
|
|
||||||
/* Check guard bytes before the output buffer */
|
/* Check guard bytes before the output buffer */
|
||||||
|
@ -800,7 +800,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
|
||||||
nMaxDecompressedSize = 65536;
|
nMaxDecompressedSize = 65536;
|
||||||
else
|
else
|
||||||
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
|
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
|
||||||
if (nMaxDecompressedSize == -1) {
|
if (nMaxDecompressedSize == (size_t)-1) {
|
||||||
free(pFileData);
|
free(pFileData);
|
||||||
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
|
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
|
||||||
return 100;
|
return 100;
|
||||||
|
@ -822,7 +822,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
|
||||||
long long t0 = do_get_time();
|
long long t0 = do_get_time();
|
||||||
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion);
|
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion);
|
||||||
long long t1 = do_get_time();
|
long long t1 = do_get_time();
|
||||||
if (nActualDecompressedSize == -1) {
|
if (nActualDecompressedSize == (size_t)-1) {
|
||||||
free(pDecompressedData);
|
free(pDecompressedData);
|
||||||
free(pFileData);
|
free(pFileData);
|
||||||
fprintf(stderr, "decompression error\n");
|
fprintf(stderr, "decompression error\n");
|
||||||
|
@ -830,7 +830,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
|
||||||
}
|
}
|
||||||
|
|
||||||
long long nCurDecTime = t1 - t0;
|
long long nCurDecTime = t1 - t0;
|
||||||
if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
|
if (nBestDecTime == (size_t)-1 || nBestDecTime > nCurDecTime)
|
||||||
nBestDecTime = nCurDecTime;
|
nBestDecTime = nCurDecTime;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -259,7 +259,7 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
|
||||||
if ((matchptr - pMatches) < nMaxMatches) {
|
if ((matchptr - pMatches) < nMaxMatches) {
|
||||||
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
|
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
|
||||||
|
|
||||||
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
if (nMatchOffset <= MAX_OFFSET) {
|
||||||
matchptr->length = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
matchptr->length = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
||||||
matchptr->offset = (const unsigned short)nMatchOffset;
|
matchptr->offset = (const unsigned short)nMatchOffset;
|
||||||
matchptr++;
|
matchptr++;
|
||||||
|
@ -287,6 +287,8 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
|
||||||
}
|
}
|
||||||
matchptr->offset = (const unsigned short)nMatchOffset;
|
matchptr->offset = (const unsigned short)nMatchOffset;
|
||||||
matchptr++;
|
matchptr++;
|
||||||
|
|
||||||
|
nPrevOffset = nMatchOffset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -299,7 +301,7 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
|
||||||
if ((matchptr - pMatches) < nMaxMatches) {
|
if ((matchptr - pMatches) < nMaxMatches) {
|
||||||
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
|
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
|
||||||
|
|
||||||
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
if (nMatchOffset <= MAX_OFFSET) {
|
||||||
const unsigned short nMatchLen = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)));
|
const unsigned short nMatchLen = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)));
|
||||||
|
|
||||||
if (nMatchLen > 2) {
|
if (nMatchLen > 2) {
|
||||||
|
|
|
@ -193,7 +193,7 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
|
||||||
*/
|
*/
|
||||||
static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nDepth) {
|
static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nDepth) {
|
||||||
const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2);
|
const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2);
|
||||||
const int *rle_len = (int*)pCompressor->intervals /* reuse */;
|
const int *rle_len = (const int*)pCompressor->intervals /* reuse */;
|
||||||
lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
|
lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
|
||||||
int j;
|
int j;
|
||||||
|
|
||||||
|
@ -443,11 +443,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||||
nMinLen = nMaxRepLenForPos;
|
nMinLen = nMaxRepLenForPos;
|
||||||
pInWindowAtPos = pInWindowStart + nMinLen;
|
pInWindowAtPos = pInWindowStart + nMinLen;
|
||||||
|
|
||||||
while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 8))
|
while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos, pInWindowAtPos - nRepOffset, 8))
|
||||||
pInWindowAtPos += 8;
|
pInWindowAtPos += 8;
|
||||||
while ((pInWindowAtPos + 4) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 4))
|
while ((pInWindowAtPos + 4) < pInWindowMax && !memcmp(pInWindowAtPos, pInWindowAtPos - nRepOffset, 4))
|
||||||
pInWindowAtPos += 4;
|
pInWindowAtPos += 4;
|
||||||
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0])
|
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[0] == pInWindowAtPos[-nRepOffset])
|
||||||
pInWindowAtPos++;
|
pInWindowAtPos++;
|
||||||
const int nCurRepLen = (const int)(pInWindowAtPos - pInWindowStart);
|
const int nCurRepLen = (const int)(pInWindowAtPos - pInWindowStart);
|
||||||
|
|
||||||
|
@ -470,8 +470,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||||
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
|
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
|
||||||
int nMatchLen = match[m].length & 0x7fff;
|
int nMatchLen = match[m].length & 0x7fff;
|
||||||
const int nMatchOffset = match[m].offset;
|
const int nMatchOffset = match[m].offset;
|
||||||
int nNoRepmatchOffsetCost;
|
int nNoRepmatchOffsetCost = 0, nNoRepmatchScore = 0;
|
||||||
int nNoRepmatchScore;
|
|
||||||
int nStartingMatchLen, k;
|
int nStartingMatchLen, k;
|
||||||
|
|
||||||
if ((i + nMatchLen) > nEndOffset)
|
if ((i + nMatchLen) > nEndOffset)
|
||||||
|
@ -690,7 +689,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || match[m + 1].length < LCP_MAX))
|
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || (match[m + 1].length & 0x7fff) < LCP_MAX))
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -779,11 +778,13 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
|
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset) {
|
||||||
/* Otherwise, try to gain a match forward as well */
|
/* Otherwise, try to gain a match forward as well */
|
||||||
if (i >= pBestMatch[nNextIndex].offset && (i + pMatch->length) <= nEndOffset) {
|
if (i >= pBestMatch[nNextIndex].offset && (i + pMatch->length) <= nEndOffset) {
|
||||||
int nMaxLen = 0;
|
int nMaxLen = 0;
|
||||||
const unsigned char *pInWindowAtPos = pInWindow + i;
|
const unsigned char *pInWindowAtPos = pInWindow + i;
|
||||||
|
while ((nMaxLen + 8) < pMatch->length && !memcmp(pInWindowAtPos + nMaxLen - pBestMatch[nNextIndex].offset, pInWindowAtPos + nMaxLen, 8))
|
||||||
|
nMaxLen += 8;
|
||||||
while ((nMaxLen + 4) < pMatch->length && !memcmp(pInWindowAtPos + nMaxLen - pBestMatch[nNextIndex].offset, pInWindowAtPos + nMaxLen, 4))
|
while ((nMaxLen + 4) < pMatch->length && !memcmp(pInWindowAtPos + nMaxLen - pBestMatch[nNextIndex].offset, pInWindowAtPos + nMaxLen, 4))
|
||||||
nMaxLen += 4;
|
nMaxLen += 4;
|
||||||
while (nMaxLen < pMatch->length && pInWindowAtPos[nMaxLen - pBestMatch[nNextIndex].offset] == pInWindowAtPos[nMaxLen])
|
while (nMaxLen < pMatch->length && pInWindowAtPos[nMaxLen - pBestMatch[nNextIndex].offset] == pInWindowAtPos[nMaxLen])
|
||||||
|
@ -799,9 +800,12 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
|
||||||
nPartialSizeBefore = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
|
nPartialSizeBefore = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
|
||||||
nPartialSizeBefore += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
|
nPartialSizeBefore += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
|
||||||
nPartialSizeBefore += lzsa_get_literals_varlen_size_v2(nNextLiterals);
|
nPartialSizeBefore += lzsa_get_literals_varlen_size_v2(nNextLiterals);
|
||||||
|
nPartialSizeBefore += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
|
||||||
|
|
||||||
nPartialSizeAfter = lzsa_get_match_varlen_size_v2(nMaxLen - MIN_MATCH_SIZE_V2);
|
nPartialSizeAfter = lzsa_get_match_varlen_size_v2(nMaxLen - MIN_MATCH_SIZE_V2);
|
||||||
nPartialSizeAfter += lzsa_get_literals_varlen_size_v2(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
|
nPartialSizeAfter += lzsa_get_literals_varlen_size_v2(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
|
||||||
|
if (nRepMatchOffset != pBestMatch[nNextIndex].offset)
|
||||||
|
nPartialSizeAfter += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
|
||||||
|
|
||||||
if (nPartialSizeAfter < nPartialSizeBefore) {
|
if (nPartialSizeAfter < nPartialSizeBefore) {
|
||||||
const int nMatchLen = pMatch->length;
|
const int nMatchLen = pMatch->length;
|
||||||
|
@ -1236,7 +1240,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
|
||||||
/* Compress optimally without breaking ties in favor of less tokens */
|
/* Compress optimally without breaking ties in favor of less tokens */
|
||||||
|
|
||||||
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
|
||||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nArrivalsPerPosition);
|
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nArrivalsPerPosition);
|
||||||
|
|
||||||
if (nInDataSize < 65536) {
|
if (nInDataSize < 65536) {
|
||||||
int* first_offset_for_byte = pCompressor->first_offset_for_byte;
|
int* first_offset_for_byte = pCompressor->first_offset_for_byte;
|
||||||
|
@ -1452,20 +1456,20 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Compress optimally and do break ties in favor of less tokens */
|
/* Compress optimally and do break ties in favor of less tokens */
|
||||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT_V2);
|
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 1 /* reduce */, 0 /* use forward reps */, NARRIVALS_PER_POSITION_V2_MAX);
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try to reduce final command set, wherever possible */
|
/* Try to reduce final command set, wherever possible */
|
||||||
nPasses = 0;
|
nPasses = 0;
|
||||||
do {
|
do {
|
||||||
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
|
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset);
|
||||||
nPasses++;
|
nPasses++;
|
||||||
} while (nDidReduce && nPasses < 20);
|
} while (nDidReduce && nPasses < 20);
|
||||||
|
|
||||||
/* Write compressed block */
|
/* Write compressed block */
|
||||||
nResult = lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
|
nResult = lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nMaxOutDataSize);
|
||||||
if (nResult < 0 && (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)) {
|
if (nResult < 0 && (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)) {
|
||||||
nResult = lzsa_write_raw_uncompressed_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
|
nResult = lzsa_write_raw_uncompressed_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nMaxOutDataSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
return nResult;
|
return nResult;
|
||||||
|
|
Loading…
Reference in New Issue
Block a user