diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c index a6a28e4..c4e4697 100644 --- a/src/shrink_block_v1.c +++ b/src/shrink_block_v1.c @@ -145,9 +145,7 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf * * @return cost in bits */ -static inline int lzsa_get_offset_cost_v1(const unsigned int nMatchOffset) { - return (nMatchOffset <= 256) ? 8 : 16; -} +#define lzsa_get_offset_cost_v1(__nMatchOffset) (((__nMatchOffset) <= 256) ? 8 : 16) /** * Attempt to pick optimal matches using a forward arrivals parser, so as to produce the smallest possible output that decompresses to the same input @@ -162,60 +160,67 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY; const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE); - int i, j, n; + int i; if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return; - memset(arrival + (nStartOffset << ARRIVALS_PER_POSITION_SHIFT), 0, sizeof(lzsa_arrival) * ((nEndOffset - nStartOffset + 1) << ARRIVALS_PER_POSITION_SHIFT)); + for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) { + int j; + memset(arrival + i, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V2_MAX); + + for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX; j++) + arrival[i + j].cost = 0x40000000; + } + + arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0; arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1; for (i = nStartOffset; i != nEndOffset; i++) { lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT]; - int m; + lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT]; + int j, m; for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) { - int nPrevCost = cur_arrival[j].cost; + const int nPrevCost = cur_arrival[j].cost; int nCodingChoiceCost = nPrevCost + 8 /* literal */; - int nScore = cur_arrival[j].score + 1; - int nNumLiterals = cur_arrival[j].num_literals + 1; - - if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) { - nCodingChoiceCost += 8; - } + const int nScore = cur_arrival[j].score + 1; + const int nNumLiterals = cur_arrival[j].num_literals + 1; + int n; if (nNumLiterals == 1) nCodingChoiceCost += nModeSwitchPenalty; + else if (nNumLiterals == LITERALS_RUN_LEN_V1 || nNumLiterals == 256 || nNumLiterals == 512) { + nCodingChoiceCost += 8; + } - lzsa_arrival *pDestSlots = &arrival[(i + 1) << ARRIVALS_PER_POSITION_SHIFT]; for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; + lzsa_arrival *pDestArrival = &pDestLiteralSlots[n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost < pDestArrival->cost || + if (nCodingChoiceCost < pDestArrival->cost || (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) { - memmove(&arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n + 1], - &arrival[((i + 1) << ARRIVALS_PER_POSITION_SHIFT) + n], + memmove(&pDestLiteralSlots[n + 1], + &pDestLiteralSlots[n], sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1)); pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; + pDestArrival->rep_offset = cur_arrival[j].rep_offset; pDestArrival->from_slot = j + 1; + pDestArrival->from_pos = i; pDestArrival->match_len = 0; pDestArrival->num_literals = nNumLiterals; pDestArrival->score = nScore; - pDestArrival->rep_offset = cur_arrival[j].rep_offset; break; } } } const lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V1); - int nNumArrivalsForThisPos = j; + const int nNumArrivalsForThisPos = j; for (m = 0; m < NMATCHES_PER_INDEX_V1 && match[m].length; m++) { int nMatchLen = match[m].length; - int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset); + const int nMatchOffsetCost = lzsa_get_offset_cost_v1(match[m].offset); int nStartingMatchLen, k; if ((i + nMatchLen) > nEndOffset) @@ -226,14 +231,13 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p else nStartingMatchLen = nMinMatchSize; for (k = nStartingMatchLen; k <= nMatchLen; k++) { - int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1); - + const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1); lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT]; for (j = 0; j < nNumArrivalsForThisPos; j++) { - int nPrevCost = cur_arrival[j].cost; + const int nPrevCost = cur_arrival[j].cost; int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost; - int exists = 0; + int exists = 0, n; if (!cur_arrival[j].num_literals) nCodingChoiceCost += nModeSwitchPenalty; @@ -248,36 +252,45 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p } if (!exists) { - int nScore = cur_arrival[j].score + 5; + const int nScore = cur_arrival[j].score + 5; + int nNonRepMatchIdx = -1; for (n = 0; n < NARRIVALS_PER_POSITION_V1 /* we only need the literals + short match cost + long match cost cases */; n++) { - lzsa_arrival *pDestArrival = &pDestSlots[n]; + lzsa_arrival* pDestArrival = &pDestSlots[n]; - if (pDestArrival->from_slot == 0 || - nCodingChoiceCost < pDestArrival->cost || + if (nCodingChoiceCost < pDestArrival->cost || (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) { - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - n - 1)); - - pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->score = nScore; - pDestArrival->rep_offset = match[m].offset; - j = NARRIVALS_PER_POSITION_V1; + nNonRepMatchIdx = n; break; } } + + if (nNonRepMatchIdx >= 0) { + lzsa_arrival* pDestArrival = &pDestSlots[nNonRepMatchIdx]; + + memmove(&pDestSlots[nNonRepMatchIdx + 1], + &pDestSlots[nNonRepMatchIdx], + sizeof(lzsa_arrival) * (NARRIVALS_PER_POSITION_V1 - nNonRepMatchIdx - 1)); + + pDestArrival->cost = nCodingChoiceCost; + pDestArrival->rep_offset = match[m].offset; + pDestArrival->from_slot = j + 1; + pDestArrival->from_pos = i; + pDestArrival->match_len = k; + pDestArrival->num_literals = 0; + pDestArrival->score = nScore; + break; + } + } + else { + break; } } } } } - lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0]; + const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0]; while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0) { if (end_arrival->from_pos >= nEndOffset) return; @@ -319,8 +332,8 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un i >= pBestMatch[i + 1].offset && (i + pBestMatch[i + 1].length + 1) <= nEndOffset && !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) { - int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1); - int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1); + const int nCurLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V1); + const int nReducedLenSize = lzsa_get_match_varlen_size_v1(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V1); if ((nReducedLenSize - nCurLenSize) <= 8) { /* Merge */ @@ -349,7 +362,7 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNextLiterals)) >= (8 /* token */ + (pMatch->length << 3) + lzsa_get_literals_varlen_size_v1(nNumLiterals + pMatch->length + nNextLiterals))) { /* Reduce */ - int nMatchLen = pMatch->length; + const int nMatchLen = pMatch->length; int j; for (j = 0; j < nMatchLen; j++) { @@ -361,8 +374,8 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un } } - if ((i + pMatch->length) < nEndOffset && pMatch->offset > 0 && pMatch->length >= MIN_MATCH_SIZE_V1 && - pBestMatch[i + pMatch->length].offset > 0 && + if ((i + pMatch->length) < nEndOffset && pMatch->offset && pMatch->length >= MIN_MATCH_SIZE_V1 && + pBestMatch[i + pMatch->length].offset && pBestMatch[i + pMatch->length].length >= MIN_MATCH_SIZE_V1 && (pMatch->length + pBestMatch[i + pMatch->length].length) <= MAX_VARLEN && (i + pMatch->length) >= pMatch->offset && @@ -375,16 +388,16 @@ static int lzsa_optimize_command_count_v1(lzsa_compressor *pCompressor, const un int nCurPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length - MIN_MATCH_SIZE_V1); nCurPartialSize += 8 /* token */ + lzsa_get_literals_varlen_size_v1(0) + ((pBestMatch[i + pMatch->length].offset <= 256) ? 8 : 16) /* match offset */ + lzsa_get_match_varlen_size_v1(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1); - int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1); + const int nReducedPartialSize = lzsa_get_match_varlen_size_v1(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V1); if (nCurPartialSize >= nReducedPartialSize) { - int nMatchLen = pMatch->length; + const int nMatchLen = pMatch->length; /* Join */ pMatch->length += pBestMatch[i + nMatchLen].length; + pBestMatch[i + nMatchLen].length = 0; pBestMatch[i + nMatchLen].offset = 0; - pBestMatch[i + nMatchLen].length = -1; nDidReduce = 1; continue; } @@ -421,11 +434,11 @@ static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match const lzsa_match *pMatch = pBestMatch + i; if (pMatch->length >= MIN_MATCH_SIZE_V1) { - int nMatchOffset = pMatch->offset; - int nMatchLen = pMatch->length; - int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; - int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + const int nMatchOffset = pMatch->offset; + const int nMatchLen = pMatch->length; + const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; + const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); nCompressedSize += nCommandSize; nNumLiterals = 0; @@ -438,7 +451,7 @@ static int lzsa_get_compressed_size_v1(lzsa_compressor *pCompressor, lzsa_match } { - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); + const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); nCompressedSize += nCommandSize; nNumLiterals = 0; @@ -474,13 +487,13 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa const lzsa_match *pMatch = pBestMatch + i; if (pMatch->length >= MIN_MATCH_SIZE_V1) { - int nMatchOffset = pMatch->offset; - int nMatchLen = pMatch->length; - int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; - int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen; - int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); + const int nMatchOffset = pMatch->offset; + const int nMatchLen = pMatch->length; + const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1; + const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + const int nTokenMatchLen = (nEncodedMatchLen >= MATCH_RUN_LEN_V1) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen; + const int nTokenLongOffset = (nMatchOffset <= 256) ? 0x00 : 0x80; + const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + (nTokenLongOffset ? 16 : 8) /* match offset */ + lzsa_get_match_varlen_size_v1(nEncodedMatchLen); if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) return -1; @@ -558,8 +571,8 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa } { - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; - int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); + const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3); if ((nOutOffset + (nCommandSize >> 3)) > nMaxOutDataSize) return -1; @@ -621,7 +634,7 @@ static int lzsa_write_block_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMa */ static int lzsa_write_raw_uncompressed_block_v1(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { int nNumLiterals = nEndOffset - nStartOffset; - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; + const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V1) ? LITERALS_RUN_LEN_V1 : nNumLiterals; int nOutOffset = 0; int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1(nNumLiterals) + (nNumLiterals << 3) + 4; diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index f81cb3b..b24ec99 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -197,66 +197,73 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */; int j; - for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX && arrival[j].from_slot; j++) { + for (j = 0; j < NARRIVALS_PER_POSITION_V2_BIG && arrival[j].from_slot; j++) { const int nRepOffset = arrival[j].rep_offset; if (nMatchOffset != nRepOffset) { const int nRepLen = arrival[j].rep_len; const int nRepPos = arrival[j].rep_pos; - if (nRepPos >= nMatchOffset && + if (nRepPos >= nStartOffset && (nRepPos + nRepLen) <= nEndOffset) { if (visited[nRepPos].offset != nMatchOffset || visited[nRepPos].length > nRepLen) { - visited[nRepPos].offset = nMatchOffset; visited[nRepPos].length = 0; + visited[nRepPos].offset = nMatchOffset; - if (pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2) + NMATCHES_PER_INDEX_V2 - 1].length == 0) { - if (!memcmp(pInWindow + nRepPos, pInWindow + nRepPos - nMatchOffset, 2)) { - if (nRepLen >= MIN_MATCH_SIZE_V2) { - const int nLen0 = rle_len[nRepPos - nMatchOffset]; - const int nLen1 = rle_len[nRepPos]; - const int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1; + lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2); - if (nMinLen >= nRepLen || !memcmp(pInWindow + nRepPos + nMinLen, pInWindow + nRepPos + nMinLen - nMatchOffset, nRepLen - nMinLen)) { - if (nRepOffset) { - lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2); - int r; + if (fwd_match[NMATCHES_PER_INDEX_V2 - 1].length == 0) { + if (nRepPos >= nMatchOffset) { + const unsigned char* pInWindowStart = pInWindow + nRepPos; - for (r = 0; fwd_match[r].length; r++) { - if (fwd_match[r].offset == nMatchOffset) { - r = -1; - break; + if (!memcmp(pInWindowStart, pInWindowStart - nMatchOffset, 2)) { + if (nRepLen >= MIN_MATCH_SIZE_V2) { + const int nLen0 = rle_len[nRepPos - nMatchOffset]; + const int nLen1 = rle_len[nRepPos]; + const int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1; + + if (nMinLen >= nRepLen || !memcmp(pInWindowStart + nMinLen, pInWindowStart + nMinLen - nMatchOffset, nRepLen - nMinLen)) { + if (nRepOffset) { + int r; + + for (r = 0; fwd_match[r].length; r++) { + if (fwd_match[r].offset == nMatchOffset) { + r = -1; + break; + } + } + + if (r != -1) { + int nMaxRepLen = nEndOffset - nRepPos; + if (nMaxRepLen > LCP_MAX) + nMaxRepLen = LCP_MAX; + + const int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen; + const unsigned char* pInWindowMax = pInWindowStart + nMaxRepLen; + const unsigned char* pInWindowAtRepPos = pInWindowStart + nCurRepLen; + + if (pInWindowAtRepPos > pInWindowMax) + pInWindowAtRepPos = pInWindowMax; + + while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8)) + pInWindowAtRepPos += 8; + while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4)) + pInWindowAtRepPos += 4; + while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset]) + pInWindowAtRepPos++; + + fwd_match[r].length = (const unsigned int)(pInWindowAtRepPos - pInWindowStart); + fwd_match[r].offset = nMatchOffset; + + if (nDepth < 9) + lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1); } } - - if (r != -1) { - int nMaxRepLen = nEndOffset - nRepPos; - if (nMaxRepLen > LCP_MAX) - nMaxRepLen = LCP_MAX; - int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen; - if (nCurRepLen > nMaxRepLen) - nCurRepLen = nMaxRepLen; - const unsigned char* pInWindowMax = pInWindow + nRepPos + nMaxRepLen; - const unsigned char* pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen; - while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8)) - pInWindowAtRepPos += 8; - while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4)) - pInWindowAtRepPos += 4; - while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset]) - pInWindowAtRepPos++; - - nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos)); - fwd_match[r].offset = nMatchOffset; - fwd_match[r].length = nCurRepLen; - - if (nDepth < 9) - lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1); - } } - } - else { - visited[nRepPos].length = nRepLen; + else { + visited[nRepPos].length = nRepLen; + } } } } @@ -281,7 +288,7 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns */ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) { lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); - const int *rle_len = (int*)pCompressor->intervals /* reuse */; + const int *rle_len = (const int*)pCompressor->intervals /* reuse */; lzsa_match *visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */; char *nRepSlotHandledMask = pCompressor->rep_slot_handled_mask; char *nRepLenHandledMask = pCompressor->rep_len_handled_mask; @@ -311,7 +318,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne for (i = nStartOffset; i != nEndOffset; i++) { lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT]; - lzsa_arrival *pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT]; + lzsa_arrival *pDestLiteralSlots = &cur_arrival[NARRIVALS_PER_POSITION_V2_MAX]; int j, m; for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) { @@ -319,6 +326,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne int nCodingChoiceCost = nPrevCost + 8 /* literal */; const int nScore = cur_arrival[j].score + 1 - nDisableScore; const int nNumLiterals = cur_arrival[j].num_literals + 1; + const int nRepOffset = cur_arrival[j].rep_offset; if (nNumLiterals == 1) { nCodingChoiceCost += nModeSwitchPenalty; @@ -334,8 +342,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } if (nCodingChoiceCost < pDestLiteralSlots[nArrivalsPerPosition - 1].cost || - (nCodingChoiceCost == pDestLiteralSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestLiteralSlots[nArrivalsPerPosition - 1].score))) { - const int nRepOffset = cur_arrival[j].rep_offset; + (nCodingChoiceCost == pDestLiteralSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestLiteralSlots[nArrivalsPerPosition - 1].score) && + nRepOffset != pDestLiteralSlots[nArrivalsPerPosition - 1].rep_offset)) { int exists = 0, n; for (n = 0; @@ -359,21 +367,19 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if (!exists) { if (n < nArrivalsPerPosition) { - int nn; + int z; - for (nn = n; - nn < nArrivalsPerPosition && pDestLiteralSlots[nn].cost == nCodingChoiceCost; - nn++) { - if (pDestLiteralSlots[nn].rep_offset == nRepOffset) { + for (z = n; + z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].cost == nCodingChoiceCost; + z++) { + if (pDestLiteralSlots[z].rep_offset == nRepOffset) { exists = 1; break; } } if (!exists) { - int z; - - for (z = n; z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].from_slot; z++) { + for (; z < nArrivalsPerPosition - 1 && pDestLiteralSlots[z].from_slot; z++) { if (pDestLiteralSlots[z].rep_offset == nRepOffset) break; } @@ -384,14 +390,14 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne lzsa_arrival* pDestArrival = &pDestLiteralSlots[n]; pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; + pDestArrival->rep_offset = nRepOffset; pDestArrival->from_slot = j + 1; + pDestArrival->from_pos = i; + pDestArrival->rep_len = cur_arrival[j].rep_len; pDestArrival->match_len = 0; + pDestArrival->rep_pos = cur_arrival[j].rep_pos; pDestArrival->num_literals = nNumLiterals; pDestArrival->score = nScore + nDisableScore; - pDestArrival->rep_offset = nRepOffset; - pDestArrival->rep_pos = cur_arrival[j].rep_pos; - pDestArrival->rep_len = cur_arrival[j].rep_len; } } } @@ -435,7 +441,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne pInWindowAtPos += 4; while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0]) pInWindowAtPos++; - const int nCurRepLen = (int)(pInWindowAtPos - pInWindowStart); + const int nCurRepLen = (const int)(pInWindowAtPos - pInWindowStart); if (nMaxOverallRepLen < nCurRepLen) nMaxOverallRepLen = nCurRepLen; @@ -516,7 +522,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne const int nCodingChoiceCost = nMatchLenCost + nNoRepmatchOffsetCost; if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 2].cost || - (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nNoRepmatchScore < (pDestSlots[nArrivalsPerPosition - 2].score))) { + (nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 2].cost && nNoRepmatchScore < (pDestSlots[nArrivalsPerPosition - 2].score) && + (nCodingChoiceCost != pDestSlots[nArrivalsPerPosition - 1].cost || nMatchOffset != pDestSlots[nArrivalsPerPosition - 1].rep_offset))) { int exists = 0, n; for (n = 0; @@ -540,33 +547,21 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if (!exists) { if (n < nArrivalsPerPosition - 1) { - int nn; + int z; - if (!nInsertForwardReps || pDestSlots[nArrivalsPerPosition - 1].from_slot) { - for (nn = n; - nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost; - nn++) { - if (pDestSlots[nn].rep_offset == nMatchOffset) { + for (z = n; + z < nArrivalsPerPosition - 1 && pDestSlots[z].cost == nCodingChoiceCost; + z++) { + if (pDestSlots[z].rep_offset == nMatchOffset) { + if (!nInsertForwardReps || pDestSlots[nArrivalsPerPosition - 1].from_slot || pDestSlots[z].rep_pos >= i) { exists = 1; - break; - } - } - } - else { - for (nn = n; - nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost; - nn++) { - if (pDestSlots[nn].rep_offset == nMatchOffset && pDestSlots[nn].rep_pos >= i) { - exists = 1; - break; } + break; } } if (!exists) { - int z; - - for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { + for (; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { if (pDestSlots[z].rep_offset == nMatchOffset) break; } @@ -577,14 +572,14 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne lzsa_arrival* pDestArrival = &pDestSlots[n]; pDestArrival->cost = nCodingChoiceCost; - pDestArrival->from_pos = i; + pDestArrival->rep_offset = nMatchOffset; pDestArrival->from_slot = nNonRepMatchArrivalIdx + 1; + pDestArrival->from_pos = i; + pDestArrival->rep_len = k; pDestArrival->match_len = k; + pDestArrival->rep_pos = i; pDestArrival->num_literals = 0; pDestArrival->score = nNoRepmatchScore + nDisableScore; - pDestArrival->rep_offset = nMatchOffset; - pDestArrival->rep_pos = i; - pDestArrival->rep_len = k; nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7)); } } @@ -608,76 +603,76 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne const int nPrevCost = cur_arrival[j].cost & 0x3fffffff; const int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost; const int nScore = cur_arrival[j].score + 2 - nDisableScore; + const int nRepOffset = cur_arrival[j].rep_offset; - if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost || - (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) { - const int nRepOffset = cur_arrival[j].rep_offset; - int exists = 0, n; + if (nRepOffset != pDestSlots[nArrivalsPerPosition - 1].rep_offset) { + if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost || + (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score))) { + int exists = 0, n; - for (n = 0; - pDestSlots[n].cost < nRepCodingChoiceCost; - n++) { - if (pDestSlots[n].rep_offset == nRepOffset) { - exists = 1; - if (!nReduce) - nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7); - break; - } - } - - if (!exists) { - for (; - n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score); + for (n = 0; + pDestSlots[n].cost < nRepCodingChoiceCost; n++) { if (pDestSlots[n].rep_offset == nRepOffset) { exists = 1; + if (!nReduce) + nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7); break; } } if (!exists) { - if (n < nArrivalsPerPosition) { - int nn; - - for (nn = n; - nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost; - nn++) { - if (pDestSlots[nn].rep_offset == nRepOffset) { - exists = 1; - break; - } + for (; + n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score); + n++) { + if (pDestSlots[n].rep_offset == nRepOffset) { + exists = 1; + break; } + } - if (!exists) { + if (!exists) { + if (n < nArrivalsPerPosition) { int z; - for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { - if (pDestSlots[z].rep_offset == nRepOffset) + for (z = n; + z < nArrivalsPerPosition - 1 && pDestSlots[z].cost == nRepCodingChoiceCost; + z++) { + if (pDestSlots[z].rep_offset == nRepOffset) { + exists = 1; break; + } } - memmove(&pDestSlots[n + 1], - &pDestSlots[n], - sizeof(lzsa_arrival) * (z - n)); + if (!exists) { + for (; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { + if (pDestSlots[z].rep_offset == nRepOffset) + break; + } - lzsa_arrival* pDestArrival = &pDestSlots[n]; - pDestArrival->cost = nRepCodingChoiceCost; - pDestArrival->from_pos = i; - pDestArrival->from_slot = j + 1; - pDestArrival->match_len = k; - pDestArrival->num_literals = 0; - pDestArrival->score = nScore + nDisableScore; - pDestArrival->rep_offset = nRepOffset; - pDestArrival->rep_pos = i; - pDestArrival->rep_len = k; - nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7)); + memmove(&pDestSlots[n + 1], + &pDestSlots[n], + sizeof(lzsa_arrival) * (z - n)); + + lzsa_arrival* pDestArrival = &pDestSlots[n]; + pDestArrival->cost = nRepCodingChoiceCost; + pDestArrival->rep_offset = nRepOffset; + pDestArrival->from_slot = j + 1; + pDestArrival->from_pos = i; + pDestArrival->rep_len = k; + pDestArrival->match_len = k; + pDestArrival->rep_pos = i; + pDestArrival->num_literals = 0; + pDestArrival->score = nScore + nDisableScore; + nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7)); + } } } } } - } - else { - break; + else { + break; + } } } } @@ -739,8 +734,8 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un i >= pBestMatch[i + 1].offset && (i + pBestMatch[i + 1].length + 1) <= nEndOffset && !memcmp(pInWindow + i - (pBestMatch[i + 1].offset), pInWindow + i, pBestMatch[i + 1].length + 1)) { - int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V2); - int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V2); + const int nCurLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length - MIN_MATCH_SIZE_V2); + const int nReducedLenSize = lzsa_get_match_varlen_size_v2(pBestMatch[i + 1].length + 1 - MIN_MATCH_SIZE_V2); if ((nReducedLenSize - nCurLenSize) <= 8) { /* Merge */ @@ -829,7 +824,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un if (pBestMatch[nNextIndex].offset != pMatch->offset) nNextCommandSize += (pBestMatch[nNextIndex].offset <= 32) ? 4 : ((pBestMatch[nNextIndex].offset <= 512) ? 8 : ((pBestMatch[nNextIndex].offset <= (8192 + 512)) ? 12 : 16)); - int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize; + const int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize; /* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */ int nReducedCommandSize = (pMatch->length << 3) + 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals + pMatch->length + nNextLiterals) + /* (nNextLiterals << 3) + */ lzsa_get_match_varlen_size_v2(pBestMatch[nNextIndex].length - MIN_MATCH_SIZE_V2); @@ -847,7 +842,7 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un if (nOriginalCombinedCommandSize >= nReducedCommandSize) { /* Reduce */ - int nMatchLen = pMatch->length; + const int nMatchLen = pMatch->length; int j; for (j = 0; j < nMatchLen; j++) { @@ -883,34 +878,32 @@ static int lzsa_optimize_command_count_v2(lzsa_compressor *pCompressor, const un nNextIndex++; } - int nNextOffset; - if (nNextIndex < nEndOffset) - nNextOffset = pBestMatch[nNextIndex].offset; - else - nNextOffset = 0; - int nCurPartialSize = lzsa_get_match_varlen_size_v2(pMatch->length - MIN_MATCH_SIZE_V2); nCurPartialSize += 8 /* token */ + /* lzsa_get_literals_varlen_size_v2(0) + */ lzsa_get_match_varlen_size_v2(pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V2); if (pBestMatch[i + pMatch->length].offset != pMatch->offset) nCurPartialSize += (pBestMatch[i + pMatch->length].offset <= 32) ? 4 : ((pBestMatch[i + pMatch->length].offset <= 512) ? 8 : ((pBestMatch[i + pMatch->length].offset <= (8192 + 512)) ? 12 : 16)); - if (nNextOffset != pBestMatch[i + pMatch->length].offset) - nCurPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16)); - int nReducedPartialSize = lzsa_get_match_varlen_size_v2(pMatch->length + pBestMatch[i + pMatch->length].length - MIN_MATCH_SIZE_V2); - if (nNextOffset != pMatch->offset) - nReducedPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16)); + if (nNextIndex < nEndOffset) { + const int nNextOffset = pBestMatch[nNextIndex].offset; + + if (nNextOffset != pBestMatch[i + pMatch->length].offset) + nCurPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16)); + + if (nNextOffset != pMatch->offset) + nReducedPartialSize += (nNextOffset <= 32) ? 4 : ((nNextOffset <= 512) ? 8 : ((nNextOffset <= (8192 + 512)) ? 12 : 16)); + } if (nCurPartialSize >= nReducedPartialSize) { - int nMatchLen = pMatch->length; + const int nMatchLen = pMatch->length; /* Join */ pMatch->length += pBestMatch[i + nMatchLen].length; + pBestMatch[i + nMatchLen].length = 0; pBestMatch[i + nMatchLen].offset = 0; - pBestMatch[i + nMatchLen].length = -1; nDidReduce = 1; continue; } @@ -1161,7 +1154,7 @@ static int lzsa_write_block_v2(lzsa_compressor *pCompressor, const lzsa_match *p static int lzsa_write_raw_uncompressed_block_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, unsigned char *pOutData, const int nMaxOutDataSize) { int nCurNibbleOffset = -1; int nNumLiterals = nEndOffset - nStartOffset; - int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; + const int nTokenLiteralsLen = (nNumLiterals >= LITERALS_RUN_LEN_V2) ? LITERALS_RUN_LEN_V2 : nNumLiterals; int nOutOffset = 0; const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2(nNumLiterals) + (nNumLiterals << 3) + 4 + 8; @@ -1223,7 +1216,8 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne i = 0; while (i < nEndOffset) { int nRangeStartIdx = i; - unsigned char c = pInWindow[nRangeStartIdx]; + const unsigned char c = pInWindow[nRangeStartIdx]; + do { i++; } while (i < nEndOffset && pInWindow[i] == c); @@ -1257,6 +1251,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne } for (nPosition = nPreviousBlockSize + 1; nPosition < (nEndOffset - 1); nPosition++) { + const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition); lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2); int m = 0, nInserted = 0; int nMatchPos; @@ -1265,9 +1260,9 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne m++; for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 15 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) { - int nMatchOffset = nPosition - nMatchPos; - int nExistingMatchIdx; + const int nMatchOffset = nPosition - nMatchPos; int nAlreadyExists = 0; + int nExistingMatchIdx; for (nExistingMatchIdx = 0; nExistingMatchIdx < m; nExistingMatchIdx++) { if (match[nExistingMatchIdx].offset == nMatchOffset) { @@ -1278,9 +1273,11 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne if (!nAlreadyExists) { int nMatchLen = 2; - while (nMatchLen < 16 && (nPosition + nMatchLen + 4) < nEndOffset && !memcmp(pInWindow + nMatchPos + nMatchLen, pInWindow + nPosition + nMatchLen, 4)) + while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8)) + nMatchLen += 8; + while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4)) nMatchLen += 4; - while (nMatchLen < 16 && (nPosition + nMatchLen) < nEndOffset && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen]) + while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen]) nMatchLen++; match[m].length = nMatchLen; match[m].offset = nMatchOffset; @@ -1300,6 +1297,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2); if (match[0].length < 5) { + const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition); int m = 0, nInserted = 0; int nMatchPos; int nMaxForwardPos = nPosition + 2 + 1 + 2; @@ -1313,7 +1311,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne } for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 46 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) { - int nMatchOffset = nPosition - nMatchPos; + const int nMatchOffset = nPosition - nMatchPos; if (nMatchOffset <= MAX_OFFSET) { int nAlreadyExists = 0; @@ -1345,7 +1343,11 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne if (nGotMatch) { int nMatchLen = 2; - while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen]) + while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8)) + nMatchLen += 8; + while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4)) + nMatchLen += 4; + while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen]) nMatchLen++; match[m].length = nMatchLen | 0x8000; match[m].offset = nMatchOffset; @@ -1354,7 +1356,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne lzsa_insert_forward_match_v2(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8); nInserted++; - if (nInserted >= 3 || m >= 46) + if (nInserted >= 3) break; } } @@ -1371,6 +1373,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne lzsa_match* match = pCompressor->match + ((nPosition - nPreviousBlockSize) << MATCHES_PER_INDEX_SHIFT_V2); if (match[0].length < 8) { + const int nMaxMatchLen = ((nPosition + 16) < nEndOffset) ? 16 : (nEndOffset - nPosition); int m = 0, nInserted = 0; int nMatchPos; int nMaxForwardPos = nPosition + 2 + 1 + 6; @@ -1384,7 +1387,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne } for (nMatchPos = next_offset_for_pos[nPosition - nPreviousBlockSize]; m < 63 && nMatchPos >= 0; nMatchPos = next_offset_for_pos[nMatchPos - nPreviousBlockSize]) { - int nMatchOffset = nPosition - nMatchPos; + const int nMatchOffset = nPosition - nMatchPos; if (nMatchOffset <= MAX_OFFSET) { int nAlreadyExists = 0; @@ -1416,7 +1419,12 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne if (nGotMatch) { int nMatchLen = 2; - while (nMatchLen < 16 && nPosition < (nEndOffset - nMatchLen) && pInWindow[nMatchPos + nMatchLen] == pInWindow[nPosition + nMatchLen]) + + while ((nMatchLen + 8) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 8)) + nMatchLen += 8; + while ((nMatchLen + 4) < nMaxMatchLen && !memcmp(pInWindow + nPosition + nMatchLen, pInWindow + nMatchPos + nMatchLen, 4)) + nMatchLen += 4; + while (nMatchLen < nMaxMatchLen && pInWindow[nPosition + nMatchLen] == pInWindow[nMatchPos + nMatchLen] ) nMatchLen++; match[m].length = nMatchLen; @@ -1426,7 +1434,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne lzsa_insert_forward_match_v2(pCompressor, pInWindow, nPosition, nMatchOffset, nPreviousBlockSize, nEndOffset, 8); nInserted++; - if (nInserted >= 12 || m >= 63) + if (nInserted >= 12) break; } } @@ -1440,10 +1448,9 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne } /* Compress optimally and do break ties in favor of less tokens */ - memset(pCompressor->improved_match, 0, BLOCK_SIZE * sizeof(lzsa_match)); - lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->improved_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT); + lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT); - pBestMatch = pCompressor->improved_match - nPreviousBlockSize; + pBestMatch = pCompressor->best_match - nPreviousBlockSize; } /* Try to reduce final command set, wherever possible */