Pack LZSA1 35% faster, and LZSA2 another 5% faster

This commit is contained in:
Emmanuel Marty 2022-04-02 08:49:26 +02:00 committed by GitHub
parent acfa11d733
commit 9e11c0893a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 254 additions and 234 deletions

View File

@ -145,9 +145,7 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf
*
* @return cost in bits
*/
static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) {
return (nMatchOffset <= 256) ? 8 : 16;
}
#define lzsa_get_offset_cost_v1(__nMatchOffset) (((__nMatchOffset) <= 256) ? 8 : 16)
/**
* Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input
@ -162,60 +160,67 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
int i, j, n;
int i;
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT));
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) {
int j;
memset(arrival + i, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V2_MAX);
for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX; j++)
arrival[i + j].cost = 0x40000000;
}
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0;
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
int m;
lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT];
int j, m;
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
int nPrevCost = cur_arrival[j].cost;
const int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
const int nScore = cur_arrival[j].score + 1;
const int nNumLiterals = cur_arrival[j].num_literals + 1;
int n;
if (nNumLiterals == 1)
nCodingChoiceCost += nModeSwitchPenalty;
else if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) {
nCodingChoiceCost += 8;
}
lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT];
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
lzsa_arrival *pDestArrival = &pDestLiteralSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
if (nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1],
&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n],
memmove(&pDestLiteralSlots[n + 1],
&pDestLiteralSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i;
pDestArrival->match_len = 0;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore;
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
break;
}
}
}
const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1);
int nNumArrivalsForThisPos = j;
const int nNumArrivalsForThisPos = j;
for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) {
int nMatchLen = match[m].length;
int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset);
int nStartingMatchLen, k;
if ((i + nMatchLen) > nEndOffset)
@ -226,14 +231,13 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
else
nStartingMatchLen = nMinMatchSize;
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nPrevCost = cur_arrival[j].cost;
const int nPrevCost = cur_arrival[j].cost;
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost;
int exists = 0;
int exists = 0, n;
if (!cur_arrival[j].num_literals)
nCodingChoiceCost += nModeSwitchPenalty;
@ -248,36 +252,45 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
}
if (!exists) {
int nScore = cur_arrival[j].score + 5;
const int nScore = cur_arrival[j].score + 5;
int nNonRepMatchIdx = -1;
for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n];
lzsa_arrival* pDestArrival = &pDestSlots[n];
if (pDestArrival->from_slot == 0 ||
nCodingChoiceCost < pDestArrival->cost ||
if (nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = match[m].offset;
j = NARRIVALS_PER_POSITION_V1;
nNonRepMatchIdx = n;
break;
}
}
if (nNonRepMatchIdx >= 0) {
lzsa_arrival* pDestArrival = &pDestSlots[nNonRepMatchIdx];
memmove(&pDestSlots[nNonRepMatchIdx + 1],
&pDestSlots[nNonRepMatchIdx],
sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - nNonRepMatchIdx - 1));
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->rep_offset = match[m].offset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
break;
}
}
else {
break;
}
}
}
}
}
lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) {
if (end_arrival->from_pos >= nEndOffset) return;
@ -319,8 +332,8 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
const int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1);
const int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
@ -349,7 +362,7 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >=
(8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) {
/* Reduce */
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
@ -361,8 +374,8 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
}
}
if ((i + pMatch->length) < nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset > 0 &&
if ((i + pMatch->length) < nEndOffset && pMatch->offset && pMatch->length >= MIN_MATCH_SIZE_V1 &&
pBestMatch[i + pMatch->length].offset &&
pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 &&
(pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN &&
(i + pMatch->length) >= pMatch->offset &&
@ -375,16 +388,16 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un
int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1);
nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1);
if (nCurPartialSize >= nReducedPartialSize) {
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].length = 0;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
nDidReduce = 1;
continue;
}
@ -421,11 +434,11 @@ static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
const int nMatchOffset = pMatch->offset;
const int nMatchLen = pMatch->length;
const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
@ -438,7 +451,7 @@ static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match
}
{
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
nCompressedSize += nCommandSize;
nNumLiterals = 0;
@ -474,13 +487,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
const lzsa_match *pMatch = pBestMatch + i;
if (pMatch->length >= MIN_MATCH_SIZE_V1) {
int nMatchOffset = pMatch->offset;
int nMatchLen = pMatch->length;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
const int nMatchOffset = pMatch->offset;
const int nMatchLen = pMatch->length;
const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen;
const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -558,8 +571,8 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
}
{
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3);
if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize)
return -1;
@ -621,7 +634,7 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa
*/
static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals;
int nOutOffset = 0;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4;

View File

@ -197,66 +197,73 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
int j;
for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX && arrival[j].from_slot; j++) {
for (j = 0; j < NARRIVALS_PER_POSITION_V2_BIG && arrival[j].from_slot; j++) {
const int nRepOffset = arrival[j].rep_offset;
if (nMatchOffset != nRepOffset) {
const int nRepLen = arrival[j].rep_len;
const int nRepPos = arrival[j].rep_pos;
if (nRepPos >= nMatchOffset &&
if (nRepPos >= nStartOffset &&
(nRepPos + nRepLen) <= nEndOffset) {
if (visited[nRepPos].offset != nMatchOffset || visited[nRepPos].length > nRepLen) {
visited[nRepPos].offset = nMatchOffset;
visited[nRepPos].length = 0;
visited[nRepPos].offset = nMatchOffset;
if (pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2) + NMATCHES_PER_INDEX_V2 - 1].length == 0) {
if (!memcmp(pInWindow + nRepPos, pInWindow + nRepPos - nMatchOffset, 2)) {
if (nRepLen >= MIN_MATCH_SIZE_V2) {
const int nLen0 = rle_len[nRepPos - nMatchOffset];
const int nLen1 = rle_len[nRepPos];
const int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
if (nMinLen >= nRepLen || !memcmp(pInWindow + nRepPos + nMinLen, pInWindow + nRepPos + nMinLen - nMatchOffset, nRepLen - nMinLen)) {
if (nRepOffset) {
lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
int r;
if (fwd_match[NMATCHES_PER_INDEX_V2 - 1].length == 0) {
if (nRepPos >= nMatchOffset) {
const unsigned char* pInWindowStart = pInWindow + nRepPos;
for (r = 0; fwd_match[r].length; r++) {
if (fwd_match[r].offset == nMatchOffset) {
r = -1;
break;
if (!memcmp(pInWindowStart, pInWindowStart - nMatchOffset, 2)) {
if (nRepLen >= MIN_MATCH_SIZE_V2) {
const int nLen0 = rle_len[nRepPos - nMatchOffset];
const int nLen1 = rle_len[nRepPos];
const int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
if (nMinLen >= nRepLen || !memcmp(pInWindowStart + nMinLen, pInWindowStart + nMinLen - nMatchOffset, nRepLen - nMinLen)) {
if (nRepOffset) {
int r;
for (r = 0; fwd_match[r].length; r++) {
if (fwd_match[r].offset == nMatchOffset) {
r = -1;
break;
}
}
if (r != -1) {
int nMaxRepLen = nEndOffset - nRepPos;
if (nMaxRepLen > LCP_MAX)
nMaxRepLen = LCP_MAX;
const int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen;
const unsigned char* pInWindowMax = pInWindowStart + nMaxRepLen;
const unsigned char* pInWindowAtRepPos = pInWindowStart + nCurRepLen;
if (pInWindowAtRepPos > pInWindowMax)
pInWindowAtRepPos = pInWindowMax;
while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8))
pInWindowAtRepPos += 8;
while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4))
pInWindowAtRepPos += 4;
while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset])
pInWindowAtRepPos++;
fwd_match[r].length = (const unsigned int)(pInWindowAtRepPos - pInWindowStart);
fwd_match[r].offset = nMatchOffset;
if (nDepth < 9)
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1);
}
}
if (r != -1) {
int nMaxRepLen = nEndOffset - nRepPos;
if (nMaxRepLen > LCP_MAX)
nMaxRepLen = LCP_MAX;
int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen;
if (nCurRepLen > nMaxRepLen)
nCurRepLen = nMaxRepLen;
const unsigned char* pInWindowMax = pInWindow + nRepPos + nMaxRepLen;
const unsigned char* pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen;
while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8))
pInWindowAtRepPos += 8;
while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4))
pInWindowAtRepPos += 4;
while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset])
pInWindowAtRepPos++;
nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos));
fwd_match[r].offset = nMatchOffset;
fwd_match[r].length = nCurRepLen;
if (nDepth < 9)
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1);
}
}
}
else {
visited[nRepPos].length = nRepLen;
else {
visited[nRepPos].length = nRepLen;
}
}
}
}
@ -281,7 +288,7 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
*/
static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int *rle_len = (int*)pCompressor->intervals /* reuse */;
const int *rle_len = (const int*)pCompressor->intervals /* reuse */;
lzsa_match *visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
char *nRepSlotHandledMask = pCompressor->rep_slot_handled_mask;
char *nRepLenHandledMask = pCompressor->rep_len_handled_mask;
@ -311,7 +318,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
for (i = nStartOffset; i != nEndOffset; i++) {
lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
lzsa_arrival *pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT];
lzsa_arrival *pDestLiteralSlots = &cur_arrival[NARRIVALS_PER_POSITION_V2_MAX];
int j, m;
for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) {
@ -319,6 +326,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
const int nScore = cur_arrival[j].score + 1 - nDisableScore;
const int nNumLiterals = cur_arrival[j].num_literals + 1;
const int nRepOffset = cur_arrival[j].rep_offset;
if (nNumLiterals == 1) {
nCodingChoiceCost += nModeSwitchPenalty;
@ -334,8 +342,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
}
if (nCodingChoiceCost < pDestLiteralSlots[nArrivalsPerPosition - 1].cost ||
(nCodingChoiceCost == pDestLiteralSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestLiteralSlots[nArrivalsPerPosition - 1].score))) {
const int nRepOffset = cur_arrival[j].rep_offset;
(nCodingChoiceCost == pDestLiteralSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestLiteralSlots[nArrivalsPerPosition - 1].score) &&
nRepOffset != pDestLiteralSlots[nArrivalsPerPosition - 1].rep_offset)) {
int exists = 0, n;
for (n = 0;
@ -359,21 +367,19 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (!exists) {
if (n < nArrivalsPerPosition) {
int nn;
int z;
for (nn = n;
nn < nArrivalsPerPosition && pDestLiteralSlots[nn].cost == nCodingChoiceCost;
nn++) {
if (pDestLiteralSlots[nn].rep_offset == nRepOffset) {
for (z = n;
z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].cost == nCodingChoiceCost;
z++) {
if (pDestLiteralSlots[z].rep_offset == nRepOffset) {
exists = 1;
break;
}
}
if (!exists) {
int z;
for (z = n; z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].from_slot; z++) {
for (; z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].from_slot; z++) {
if (pDestLiteralSlots[z].rep_offset == nRepOffset)
break;
}
@ -384,14 +390,14 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_arrival* pDestArrival = &pDestLiteralSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->rep_offset = nRepOffset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i;
pDestArrival->rep_len = cur_arrival[j].rep_len;
pDestArrival->match_len = 0;
pDestArrival->rep_pos = cur_arrival[j].rep_pos;
pDestArrival->num_literals = nNumLiterals;
pDestArrival->score = nScore + nDisableScore;
pDestArrival->rep_offset = nRepOffset;
pDestArrival->rep_pos = cur_arrival[j].rep_pos;
pDestArrival->rep_len = cur_arrival[j].rep_len;
}
}
}
@ -435,7 +441,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
pInWindowAtPos += 4;
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0])
pInWindowAtPos++;
const int nCurRepLen = (int)(pInWindowAtPos - pInWindowStart);
const int nCurRepLen = (const int)(pInWindowAtPos - pInWindowStart);
if (nMaxOverallRepLen < nCurRepLen)
nMaxOverallRepLen = nCurRepLen;
@ -516,7 +522,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
const int nCodingChoiceCost = nMatchLenCost + nNoRepmatchOffsetCost;
if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 2].cost ||
(nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nNoRepmatchScore < (pDestSlots[nArrivalsPerPosition - 2].score))) {
(nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nNoRepmatchScore < (pDestSlots[nArrivalsPerPosition - 2].score) &&
(nCodingChoiceCost != pDestSlots[nArrivalsPerPosition - 1].cost || nMatchOffset != pDestSlots[nArrivalsPerPosition - 1].rep_offset))) {
int exists = 0, n;
for (n = 0;
@ -540,33 +547,21 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (!exists) {
if (n < nArrivalsPerPosition - 1) {
int nn;
int z;
if (!nInsertForwardReps || pDestSlots[nArrivalsPerPosition - 1].from_slot) {
for (nn = n;
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
nn++) {
if (pDestSlots[nn].rep_offset == nMatchOffset) {
for (z = n;
z < nArrivalsPerPosition - 1 && pDestSlots[z].cost == nCodingChoiceCost;
z++) {
if (pDestSlots[z].rep_offset == nMatchOffset) {
if (!nInsertForwardReps || pDestSlots[nArrivalsPerPosition - 1].from_slot || pDestSlots[z].rep_pos >= i) {
exists = 1;
break;
}
}
}
else {
for (nn = n;
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
nn++) {
if (pDestSlots[nn].rep_offset == nMatchOffset && pDestSlots[nn].rep_pos >= i) {
exists = 1;
break;
}
break;
}
}
if (!exists) {
int z;
for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
for (; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
if (pDestSlots[z].rep_offset == nMatchOffset)
break;
}
@ -577,14 +572,14 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_arrival* pDestArrival = &pDestSlots[n];
pDestArrival->cost = nCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->rep_offset = nMatchOffset;
pDestArrival->from_slot = nNonRepMatchArrivalIdx + 1;
pDestArrival->from_pos = i;
pDestArrival->rep_len = k;
pDestArrival->match_len = k;
pDestArrival->rep_pos = i;
pDestArrival->num_literals = 0;
pDestArrival->score = nNoRepmatchScore + nDisableScore;
pDestArrival->rep_offset = nMatchOffset;
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
}
}
@ -608,76 +603,76 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
const int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
const int nScore = cur_arrival[j].score + 2 - nDisableScore;
const int nRepOffset = cur_arrival[j].rep_offset;
if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
(nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) {
const int nRepOffset = cur_arrival[j].rep_offset;
int exists = 0, n;
if (nRepOffset != pDestSlots[nArrivalsPerPosition - 1].rep_offset) {
if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
(nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) {
int exists = 0, n;
for (n = 0;
pDestSlots[n].cost < nRepCodingChoiceCost;
n++) {
if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1;
if (!nReduce)
nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7);
break;
}
}
if (!exists) {
for (;
n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score);
for (n = 0;
pDestSlots[n].cost < nRepCodingChoiceCost;
n++) {
if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1;
if (!nReduce)
nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7);
break;
}
}
if (!exists) {
if (n < nArrivalsPerPosition) {
int nn;
for (nn = n;
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost;
nn++) {
if (pDestSlots[nn].rep_offset == nRepOffset) {
exists = 1;
break;
}
for (;
n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score);
n++) {
if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1;
break;
}
}
if (!exists) {
if (!exists) {
if (n < nArrivalsPerPosition) {
int z;
for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
if (pDestSlots[z].rep_offset == nRepOffset)
for (z = n;
z < nArrivalsPerPosition - 1 && pDestSlots[z].cost == nRepCodingChoiceCost;
z++) {
if (pDestSlots[z].rep_offset == nRepOffset) {
exists = 1;
break;
}
}
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (z - n));
if (!exists) {
for (; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
if (pDestSlots[z].rep_offset == nRepOffset)
break;
}
lzsa_arrival* pDestArrival = &pDestSlots[n];
pDestArrival->cost = nRepCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore + nDisableScore;
pDestArrival->rep_offset = nRepOffset;
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
memmove(&pDestSlots[n + 1],
&pDestSlots[n],
sizeof(lzsa_arrival) * (z - n));
lzsa_arrival* pDestArrival = &pDestSlots[n];
pDestArrival->cost = nRepCodingChoiceCost;
pDestArrival->rep_offset = nRepOffset;
pDestArrival->from_slot = j + 1;
pDestArrival->from_pos = i;
pDestArrival->rep_len = k;
pDestArrival->match_len = k;
pDestArrival->rep_pos = i;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore + nDisableScore;
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
}
}
}
}
}
}
else {
break;
else {
break;
}
}
}
}
@ -739,8 +734,8 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
i >= pBestMatch[i + 1].offset &&
(i + pBestMatch[i + 1].length + 1) <= nEndOffset &&
!memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) {
int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V2);
int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V2);
const int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V2);
const int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V2);
if ((nReducedLenSize - nCurLenSize) <= 8) {
/* Merge */
@ -829,7 +824,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
if (pBestMatch[nNextIndex].offset != pMatch->offset)
nNextCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16));
int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
const int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize;
/* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */
int nReducedCommandSize = (pMatch->length << 3) + 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals + pMatch->length + nNextLiterals) + /* (nNextLiterals << 3) + */ lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2);
@ -847,7 +842,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
if (nOriginalCombinedCommandSize >= nReducedCommandSize) {
/* Reduce */
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
int j;
for (j = 0; j < nMatchLen; j++) {
@ -883,34 +878,32 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un
nNextIndex++;
}
int nNextOffset;
if (nNextIndex < nEndOffset)
nNextOffset = pBestMatch[nNextIndex].offset;
else
nNextOffset = 0;
int nCurPartialSize = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2);
nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v2(0) + */ lzsa_get_match_varlen_size_v2(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V2);
if (pBestMatch[i + pMatch->length].offset != pMatch->offset)
nCurPartialSize += (pBestMatch[i + pMatch->length].offset <= 32) ? 4 : ((pBestMatch[i + pMatch->length].offset <= 512) ? 8 : ((pBestMatch[i + pMatch->length].offset <= (8192 + 512)) ? 12 : 16));
if (nNextOffset != pBestMatch[i + pMatch->length].offset)
nCurPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16));
int nReducedPartialSize = lzsa_get_match_varlen_size_v2(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V2);
if (nNextOffset != pMatch->offset)
nReducedPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16));
if (nNextIndex < nEndOffset) {
const int nNextOffset = pBestMatch[nNextIndex].offset;
if (nNextOffset != pBestMatch[i + pMatch->length].offset)
nCurPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16));
if (nNextOffset != pMatch->offset)
nReducedPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16));
}
if (nCurPartialSize >= nReducedPartialSize) {
int nMatchLen = pMatch->length;
const int nMatchLen = pMatch->length;
/* Join */
pMatch->length += pBestMatch[i + nMatchLen].length;
pBestMatch[i + nMatchLen].length = 0;
pBestMatch[i + nMatchLen].offset = 0;
pBestMatch[i + nMatchLen].length = -1;
nDidReduce = 1;
continue;
}
@ -1161,7 +1154,7 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const lzsa_match *p
static int lzsa_write_raw_uncompressed_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) {
int nCurNibbleOffset = -1;
int nNumLiterals = nEndOffset - nStartOffset;
int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals;
int nOutOffset = 0;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + 4 + 8;
@ -1223,7 +1216,8 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
i = 0;
while (i < nEndOffset) {
int nRangeStartIdx = i;
unsigned char c = pInWindow[nRangeStartIdx];
const unsigned char c = pInWindow[nRangeStartIdx];
do {
i++;
} while (i < nEndOffset && pInWindow[i] == c);
@ -1257,6 +1251,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
}
for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) {
const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition);
lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2);
int m = 0, nInserted = 0;
int nMatchPos;
@ -1265,9 +1260,9 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
m++;
for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 15 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
int nMatchOffset = nPosition - nMatchPos;
int nExistingMatchIdx;
const int nMatchOffset = nPosition - nMatchPos;
int nAlreadyExists = 0;
int nExistingMatchIdx;
for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) {
if (match[nExistingMatchIdx].offset == nMatchOffset) {
@ -1278,9 +1273,11 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
if (!nAlreadyExists) {
int nMatchLen = 2;
while (nMatchLen < 16 && (nPosition + nMatchLen + 4) < nEndOffset && !memcmp(pInWindow + nMatchPos + nMatchLen, pInWindow + nPosition + nMatchLen, 4))
while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8))
nMatchLen += 8;
while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4))
nMatchLen += 4;
while (nMatchLen < 16 && (nPosition + nMatchLen) < nEndOffset && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen])
nMatchLen++;
match[m].length = nMatchLen;
match[m].offset = nMatchOffset;
@ -1300,6 +1297,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2);
if (match[0].length < 5) {
const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition);
int m = 0, nInserted = 0;
int nMatchPos;
int nMaxForwardPos = nPosition + 2 + 1 + 2;
@ -1313,7 +1311,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
}
for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 46 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
int nMatchOffset = nPosition - nMatchPos;
const int nMatchOffset = nPosition - nMatchPos;
if (nMatchOffset <= MAX_OFFSET) {
int nAlreadyExists = 0;
@ -1345,7 +1343,11 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
if (nGotMatch) {
int nMatchLen = 2;
while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8))
nMatchLen += 8;
while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4))
nMatchLen += 4;
while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen])
nMatchLen++;
match[m].length = nMatchLen | 0x8000;
match[m].offset = nMatchOffset;
@ -1354,7 +1356,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8);
nInserted++;
if (nInserted >= 3 || m >= 46)
if (nInserted >= 3)
break;
}
}
@ -1371,6 +1373,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2);
if (match[0].length < 8) {
const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition);
int m = 0, nInserted = 0;
int nMatchPos;
int nMaxForwardPos = nPosition + 2 + 1 + 6;
@ -1384,7 +1387,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
}
for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 63 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) {
int nMatchOffset = nPosition - nMatchPos;
const int nMatchOffset = nPosition - nMatchPos;
if (nMatchOffset <= MAX_OFFSET) {
int nAlreadyExists = 0;
@ -1416,7 +1419,12 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
if (nGotMatch) {
int nMatchLen = 2;
while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen])
while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8))
nMatchLen += 8;
while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4))
nMatchLen += 4;
while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen] )
nMatchLen++;
match[m].length = nMatchLen;
@ -1426,7 +1434,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8);
nInserted++;
if (nInserted >= 12 || m >= 63)
if (nInserted >= 12)
break;
}
}
@ -1440,10 +1448,9 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
}
/* Compress optimally and do break ties in favor of less tokens */
memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT);
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT);
pBestMatch = pCompressor->improved_match - nPreviousBlockSize;
pBestMatch = pCompressor->best_match - nPreviousBlockSize;
}
/* Try to reduce final command set, wherever possible */