Compare commits

...

8 Commits

Author SHA1 Message Date
Emmanuel Marty
15ee2dfe11
Bump version 2023-02-27 18:23:00 +01:00
Emmanuel Marty
35ec6d73da
Optimal LZSA1 compression 2023-02-27 08:26:42 +01:00
Emmanuel Marty
6b08bc3990
Update README 2023-02-13 10:37:25 +01:00
Emmanuel Marty
9350d977bf
Add consts 2023-02-10 17:08:03 +01:00
Emmanuel Marty
82f03b55e3
Faster LZSA1 compression 2023-02-02 11:11:14 +01:00
Emmanuel Marty
583e4db62e
Small improvements 2023-01-30 13:21:43 +01:00
Emmanuel Marty
398885a52d
Small simplifications in matchfinder 2023-01-30 13:19:03 +01:00
Emmanuel Marty
21a0dc70c8
Fix CppCheck warnings 2023-01-30 13:17:30 +01:00
8 changed files with 79 additions and 68 deletions

View File

@ -21,6 +21,8 @@ The [desolate](https://github.com/nzeemin/spectrum-desolate) game port to the ZX
The [Lowtech demo](https://github.com/wiz21b/lowtech) for the Apple II+ and IIe, by Wiz/Imphobia, compresses data with LZSA.
The [Druid & Droid](https://leosoft.itch.io/druid-and-droid) game for the Amstrad CPC, also uses LZSA for compression.
The LZSA compression tool uses an aggressive optimal packing strategy to try to find the sequence of commands that gives the smallest packed file that decompresses to the original while maintaining the maximum possible decompression speed.
The compression formats give the user choices that range from decompressing faster than LZ4 on 8-bit systems with better compression, to compressing as well as ZX7 with much better decompression speed. LZSA1 is designed to replace LZ4 and LZSA2 to replace ZX7, in 8-bit scenarios.

View File

@ -64,7 +64,7 @@ int lzsa_get_frame_size(void) {
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion) {
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion) {
if (nMaxFrameDataSize >= 3 && (nFormatVersion == 1 || nFormatVersion == 2)) {
pFrameData[0] = LZSA_ID_0; /* Magic number */
pFrameData[1] = LZSA_ID_1;

View File

@ -60,7 +60,7 @@ int lzsa_get_frame_size(void);
*
* @return number of encoded bytes, or -1 for failure
*/
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, int nFormatVersion);
int lzsa_encode_header(unsigned char *pFrameData, const int nMaxFrameDataSize, const int nFormatVersion);
/**
* Encode compressed block frame header

View File

@ -60,11 +60,11 @@ extern "C" {
* @param nBufferSize size of buffer in bytes
*/
static inline void lzsa_reverse_buffer(unsigned char *pBuffer, const int nBufferSize) {
int nMidPoint = nBufferSize / 2;
const int nMidPoint = nBufferSize / 2;
int i, j;
for (i = 0, j = nBufferSize - 1; i < nMidPoint; i++, j--) {
unsigned char c = pBuffer[i];
const unsigned char c = pBuffer[i];
pBuffer[i] = pBuffer[j];
pBuffer[j] = c;
}

View File

@ -47,7 +47,7 @@
#define OPT_RAW_BACKWARD 8
#define OPT_STATS 16
#define TOOL_VERSION "1.4.0"
#define TOOL_VERSION "1.4.1"
/*---------------------------------------------------------------------------*/
@ -527,7 +527,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, co
/* Try to compress it, expected to succeed */
size_t nActualCompressedSize = lzsa_compress_inmem(pGeneratedData, pCompressedData, nGeneratedDataSize, lzsa_get_max_compressed_size_inmem(nGeneratedDataSize),
nFlags, nMinMatchSize, nFormatVersion);
if (nActualCompressedSize == -1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
if (nActualCompressedSize == (size_t)-1 || (int)nActualCompressedSize < (lzsa_get_header_size() + lzsa_get_frame_size() + lzsa_get_frame_size() /* footer */)) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -545,7 +545,7 @@ static int do_self_test(const unsigned int nOptions, const int nMinMatchSize, co
size_t nActualDecompressedSize;
int nDecFormatVersion = nFormatVersion;
nActualDecompressedSize = lzsa_decompress_inmem(pCompressedData, pTmpDecompressedData, nActualCompressedSize, nGeneratedDataSize, nFlags, &nDecFormatVersion);
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pTmpDecompressedData);
pTmpDecompressedData = NULL;
free(pTmpCompressedData);
@ -691,7 +691,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
long long t0 = do_get_time();
nActualCompressedSize = lzsa_compress_inmem(pFileData, pCompressedData + 1024, nFileSize, nRightGuardPos, nFlags, nMinMatchSize, nFormatVersion);
long long t1 = do_get_time();
if (nActualCompressedSize == -1) {
if (nActualCompressedSize == (size_t)-1) {
free(pCompressedData);
free(pFileData);
fprintf(stderr, "compression error\n");
@ -699,7 +699,7 @@ static int do_compr_benchmark(const char *pszInFilename, const char *pszOutFilen
}
long long nCurDecTime = t1 - t0;
if (nBestCompTime == -1 || nBestCompTime > nCurDecTime)
if (nBestCompTime == (size_t)-1 || nBestCompTime > nCurDecTime)
nBestCompTime = nCurDecTime;
/* Check guard bytes before the output buffer */
@ -800,7 +800,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
nMaxDecompressedSize = 65536;
else
nMaxDecompressedSize = lzsa_get_max_decompressed_size_inmem(pFileData, nFileSize);
if (nMaxDecompressedSize == -1) {
if (nMaxDecompressedSize == (size_t)-1) {
free(pFileData);
fprintf(stderr, "invalid compressed format for file '%s'\n", pszInFilename);
return 100;
@ -822,7 +822,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
long long t0 = do_get_time();
nActualDecompressedSize = lzsa_decompress_inmem(pFileData, pDecompressedData, nFileSize, nMaxDecompressedSize, nFlags, &nFormatVersion);
long long t1 = do_get_time();
if (nActualDecompressedSize == -1) {
if (nActualDecompressedSize == (size_t)-1) {
free(pDecompressedData);
free(pFileData);
fprintf(stderr, "decompression error\n");
@ -830,7 +830,7 @@ static int do_dec_benchmark(const char *pszInFilename, const char *pszOutFilenam
}
long long nCurDecTime = t1 - t0;
if (nBestDecTime == -1 || nBestDecTime > nCurDecTime)
if (nBestDecTime == (size_t)-1 || nBestDecTime > nCurDecTime)
nBestDecTime = nCurDecTime;
}

View File

@ -203,6 +203,7 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
unsigned int match_pos;
lzsa_match *matchptr;
unsigned int nPrevOffset = 0;
unsigned char nV1OffsetFound[2] = { 0, 0 };
/**
* Find matches using intervals
@ -259,7 +260,7 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
if ((matchptr - pMatches) < nMaxMatches) {
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (nMatchOffset <= MAX_OFFSET) {
matchptr->length = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
@ -281,12 +282,25 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) {
matchptr->length = (const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
else {
matchptr->length = (const unsigned short)(ref >> LCP_SHIFT);
unsigned int nV1OffsetType = (nMatchOffset <= 256) ? 0 : 1;
if (!nV1OffsetFound[nV1OffsetType]) {
matchptr->length = (const unsigned short)(ref >> LCP_SHIFT);
matchptr->offset = (const unsigned short)nMatchOffset;
if (matchptr->length < 256)
nV1OffsetFound[nV1OffsetType] = 1;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
matchptr->offset = (const unsigned short)nMatchOffset;
matchptr++;
}
}
@ -299,7 +313,7 @@ static int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset,
if ((matchptr - pMatches) < nMaxMatches) {
const unsigned int nMatchOffset = (const unsigned int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (nMatchOffset <= MAX_OFFSET) {
const unsigned short nMatchLen = ((const unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)));
if (nMatchLen > 2) {

View File

@ -219,23 +219,23 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nSt
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
const int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if (nNumArrivalsForThisPos != 0) {
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if ((i + nMatchLen) > nEndOffset)
nMatchLen = nEndOffset - i;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
nStartingMatchLen = nMatchLen;
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
if (nNumArrivalsForThisPos != 0) {
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT_V1];
lzsa_arrival* pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V1];
int nCodingChoiceCost = cur_arrival[0].cost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0, n;
@ -253,31 +253,22 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, const int nSt
if (!exists) {
const int nScore = cur_arrival[0].score + 5;
int nNonRepMatchIdx = -1;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
if (nCodingChoiceCost < pDestSlots[n].cost ||
(nCodingChoiceCost == pDestSlots[n].cost && nScore < (pDestSlots[n].score + nDisableScore))) {
nNonRepMatchIdx = n;
break;
}
if (nCodingChoiceCost < pDestSlots[0].cost ||
(nCodingChoiceCost == pDestSlots[0].cost && nScore < (pDestSlots[0].score + nDisableScore))) {
memmove(&pDestSlots[1],
&pDestSlots[0],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - 1));
pDestSlots->cost = nCodingChoiceCost;
pDestSlots->rep_offset = match[m].offset;
pDestSlots->from_slot = 1;
pDestSlots->from_pos = i - nStartOffset;
pDestSlots->match_len = k;
pDestSlots->num_literals = 0;
pDestSlots->score = nScore;
}
if (nNonRepMatchIdx >= 0) {
memmove(&pDestSlots[nNonRepMatchIdx + 1],
&pDestSlots[nNonRepMatchIdx],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - nNonRepMatchIdx - 1));
lzsa_arrival* pDestArrival = &pDestSlots[nNonRepMatchIdx];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->rep_offset = match[m].offset;
pDestArrival->from_slot = 1;
pDestArrival->from_pos = i - nStartOffset;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
}
}
}
}
}
}
@ -375,7 +366,7 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
pBestMatch[i + pMatch->length].length)) {
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v1(0) + */ ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);

View File

@ -193,7 +193,7 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
*/
static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nDepth) {
const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2);
const int *rle_len = (int*)pCompressor->intervals /* reuse */;
const int *rle_len = (const int*)pCompressor->intervals /* reuse */;
lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
int j;
@ -443,11 +443,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
nMinLen = nMaxRepLenForPos;
pInWindowAtPos = pInWindowStart + nMinLen;
while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 8))
while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos, pInWindowAtPos - nRepOffset, 8))
pInWindowAtPos += 8;
while ((pInWindowAtPos + 4) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 4))
while ((pInWindowAtPos + 4) < pInWindowMax && !memcmp(pInWindowAtPos, pInWindowAtPos - nRepOffset, 4))
pInWindowAtPos += 4;
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0])
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[0] == pInWindowAtPos[-nRepOffset])
pInWindowAtPos++;
const int nCurRepLen = (const int)(pInWindowAtPos - pInWindowStart);
@ -470,8 +470,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
int nMatchLen = match[m].length & 0x7fff;
const int nMatchOffset = match[m].offset;
int nNoRepmatchOffsetCost;
int nNoRepmatchScore;
int nNoRepmatchOffsetCost = 0, nNoRepmatchScore = 0;
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
@ -690,7 +689,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
}
}
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || match[m + 1].length < LCP_MAX))
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || (match[m + 1].length & 0x7fff) < LCP_MAX))
break;
}
}
@ -779,11 +778,13 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
}
}
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset && nRepMatchOffset != pBestMatch[nNextIndex].offset) {
if (pBestMatch[nNextIndex].offset && pMatch->offset != pBestMatch[nNextIndex].offset) {
/* Otherwise, try to gain a match forward as well */
if (i >= pBestMatch[nNextIndex].offset && (i + pMatch->length) <= nEndOffset) {
int nMaxLen = 0;
const unsigned char *pInWindowAtPos = pInWindow + i;
while ((nMaxLen + 8) < pMatch->length && !memcmp(pInWindowAtPos + nMaxLen - pBestMatch[nNextIndex].offset, pInWindowAtPos + nMaxLen, 8))
nMaxLen += 8;
while ((nMaxLen + 4) < pMatch->length && !memcmp(pInWindowAtPos + nMaxLen - pBestMatch[nNextIndex].offset, pInWindowAtPos + nMaxLen, 4))
nMaxLen += 4;
while (nMaxLen < pMatch->length && pInWindowAtPos[nMaxLen - pBestMatch[nNextIndex].offset] == pInWindowAtPos[nMaxLen])
@ -799,9 +800,12 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
nPartialSizeBefore = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
nPartialSizeBefore += (pMatch->offset <= 32) ? 4 : ((pMatch->offset <= 512) ? 8 : ((pMatch->offset <= (8192 + 512)) ? 12 : 16));
nPartialSizeBefore += lzsa_get_literals_varlen_size_v2(nNextLiterals);
nPartialSizeBefore += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
nPartialSizeAfter = lzsa_get_match_varlen_size_v2(nMaxLen - MIN_MATCH_SIZE_V2);
nPartialSizeAfter += lzsa_get_literals_varlen_size_v2(nNextLiterals + (pMatch->length - nMaxLen)) + ((pMatch->length - nMaxLen) << 3);
if (nRepMatchOffset != pBestMatch[nNextIndex].offset)
nPartialSizeAfter += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
if (nPartialSizeAfter < nPartialSizeBefore) {
const int nMatchLen = pMatch->length;
@ -1236,7 +1240,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
/* Compress optimally without breaking ties in favor of less tokens */
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nArrivalsPerPosition);
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nArrivalsPerPosition);
if (nInDataSize < 65536) {
int* first_offset_for_byte = pCompressor->first_offset_for_byte;
@ -1452,20 +1456,20 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
}
/* Compress optimally and do break ties in favor of less tokens */
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT_V2);
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, 1 /* reduce */, 0 /* use forward reps */, NARRIVALS_PER_POSITION_V2_MAX);
}
/* Try to reduce final command set, wherever possible */
nPasses = 0;
do {
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize);
nDidReduce = lzsa_optimize_command_count_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset);
nPasses++;
} while (nDidReduce && nPasses < 20);
/* Write compressed block */
nResult = lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
nResult = lzsa_write_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nMaxOutDataSize);
if (nResult < 0 && (pCompressor->flags & LZSA_FLAG_RAW_BLOCK)) {
nResult = lzsa_write_raw_uncompressed_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, pOutData, nMaxOutDataSize);
nResult = lzsa_write_raw_uncompressed_block_v2(pCompressor, pInWindow, nPreviousBlockSize, nEndOffset, pOutData, nMaxOutDataSize);
}
return nResult;