Compress LZSA2 a bit faster again

This commit is contained in:
Emmanuel Marty 2021-10-10 07:52:03 +02:00 committed by GitHub
parent c6a93601cf
commit bb1b4fda14
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

View File

@ -194,61 +194,64 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
for (j = 0; j < NARRIVALS_PER_POSITION_V2_BIG && arrival[j].from_slot; j++) { for (j = 0; j < NARRIVALS_PER_POSITION_V2_BIG && arrival[j].from_slot; j++) {
const int nRepOffset = arrival[j].rep_offset; const int nRepOffset = arrival[j].rep_offset;
if (nMatchOffset != nRepOffset && nRepOffset && arrival[j].rep_len >= MIN_MATCH_SIZE_V2) { if (nMatchOffset != nRepOffset && nRepOffset) {
const int nRepPos = arrival[j].rep_pos;
const int nRepLen = arrival[j].rep_len; const int nRepLen = arrival[j].rep_len;
if (nRepPos > nMatchOffset && if (nRepLen >= MIN_MATCH_SIZE_V2) {
(nRepPos + nRepLen) <= nEndOffset) { const int nRepPos = arrival[j].rep_pos;
if (visited[nRepPos].offset != nMatchOffset || visited[nRepPos].length > nRepLen) { if (nRepPos > nMatchOffset &&
visited[nRepPos].offset = nMatchOffset; (nRepPos + nRepLen) <= nEndOffset) {
visited[nRepPos].length = 0;
if (pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2) + NMATCHES_PER_INDEX_V2 - 1].length == 0) { if (visited[nRepPos].offset != nMatchOffset || visited[nRepPos].length > nRepLen) {
if (!memcmp(pInWindow + nRepPos, pInWindow + nRepPos - nMatchOffset, 2)) { visited[nRepPos].offset = nMatchOffset;
const int nLen0 = rle_len[nRepPos - nMatchOffset]; visited[nRepPos].length = 0;
const int nLen1 = rle_len[nRepPos];
int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
if (nMinLen >= nRepLen || !memcmp(pInWindow + nRepPos + nMinLen, pInWindow + nRepPos + nMinLen - nMatchOffset, nRepLen - nMinLen)) { if (pCompressor->match[((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2) + NMATCHES_PER_INDEX_V2 - 1].length == 0) {
lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2); if (!memcmp(pInWindow + nRepPos, pInWindow + nRepPos - nMatchOffset, 2)) {
int r; const int nLen0 = rle_len[nRepPos - nMatchOffset];
const int nLen1 = rle_len[nRepPos];
int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
for (r = 0; fwd_match[r].length; r++) { if (nMinLen >= nRepLen || !memcmp(pInWindow + nRepPos + nMinLen, pInWindow + nRepPos + nMinLen - nMatchOffset, nRepLen - nMinLen)) {
if (fwd_match[r].offset == nMatchOffset) { lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
r = NMATCHES_PER_INDEX_V2; int r;
break;
for (r = 0; fwd_match[r].length; r++) {
if (fwd_match[r].offset == nMatchOffset) {
r = NMATCHES_PER_INDEX_V2;
break;
}
}
if (r < NMATCHES_PER_INDEX_V2) {
int nMaxRepLen = nEndOffset - nRepPos;
if (nMaxRepLen > LCP_MAX)
nMaxRepLen = LCP_MAX;
int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen;
if (nCurRepLen > nMaxRepLen)
nCurRepLen = nMaxRepLen;
const unsigned char* pInWindowMax = pInWindow + nRepPos + nMaxRepLen;
const unsigned char* pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen;
while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8))
pInWindowAtRepPos += 8;
while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4))
pInWindowAtRepPos += 4;
while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset])
pInWindowAtRepPos++;
nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos));
fwd_match[r].offset = nMatchOffset;
fwd_match[r].length = nCurRepLen;
if (nDepth < 9)
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1);
} }
} }
else {
if (r < NMATCHES_PER_INDEX_V2) { visited[nRepPos].length = nRepLen;
int nMaxRepLen = nEndOffset - nRepPos;
if (nMaxRepLen > LCP_MAX)
nMaxRepLen = LCP_MAX;
int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen;
if (nCurRepLen > nMaxRepLen)
nCurRepLen = nMaxRepLen;
const unsigned char* pInWindowMax = pInWindow + nRepPos + nMaxRepLen;
const unsigned char* pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen;
while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8))
pInWindowAtRepPos += 8;
while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4))
pInWindowAtRepPos += 4;
while (pInWindowAtRepPos < pInWindowMax && pInWindowAtRepPos[0] == pInWindowAtRepPos[-nMatchOffset])
pInWindowAtRepPos++;
nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos));
fwd_match[r].offset = nMatchOffset;
fwd_match[r].length = nCurRepLen;
if (nDepth < 9)
lzsa_insert_forward_match_v2(pCompressor, pInWindow, nRepPos, nMatchOffset, nStartOffset, nEndOffset, nDepth + 1);
} }
} }
else {
visited[nRepPos].length = nRepLen;
}
} }
} }
} }
@ -525,14 +528,24 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (n < nArrivalsPerPosition - 1) { if (n < nArrivalsPerPosition - 1) {
int nn; int nn;
for (nn = n; if (!nInsertForwardReps || pDestSlots[nArrivalsPerPosition - 1].from_slot) {
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost; for (nn = n;
nn++) { nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
if (pDestSlots[nn].rep_offset == nMatchOffset && nn++) {
(!nInsertForwardReps || pDestSlots[nn].rep_pos >= i || if (pDestSlots[nn].rep_offset == nMatchOffset) {
pDestSlots[nArrivalsPerPosition - 1].from_slot)) { exists = 1;
exists = 1; break;
break; }
}
}
else {
for (nn = n;
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nCodingChoiceCost;
nn++) {
if (pDestSlots[nn].rep_offset == nMatchOffset && pDestSlots[nn].rep_pos >= i) {
exists = 1;
break;
}
} }
} }
@ -561,7 +574,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
pDestArrival->rep_offset = nMatchOffset; pDestArrival->rep_offset = nMatchOffset;
pDestArrival->rep_pos = i; pDestArrival->rep_pos = i;
pDestArrival->rep_len = k; pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~(1 << (k & 7)); nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
} }
} }
} }
@ -577,81 +590,84 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
nRepLenHandledMask[k >> 3] |= 1 << (k & 7); nRepLenHandledMask[k >> 3] |= 1 << (k & 7);
for (nCurRepMatchArrival = 0; (j = nRepMatchArrivalIdxAndLen[nCurRepMatchArrival]) >= 0; nCurRepMatchArrival += 2) { for (nCurRepMatchArrival = 0; (j = nRepMatchArrivalIdxAndLen[nCurRepMatchArrival]) >= 0; nCurRepMatchArrival += 2) {
int nMaskOffset = (j << 7) + (k >> 3); if (nRepMatchArrivalIdxAndLen[nCurRepMatchArrival + 1] >= k) {
if (nRepMatchArrivalIdxAndLen[nCurRepMatchArrival + 1] >= k && (nReduce || !(nRepSlotHandledMask[nMaskOffset] & (1 << (k & 7))))) { int nMaskOffset = (j << 7) + (k >> 3);
const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
int nScore = cur_arrival[j].score + 2;
if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost || if (nReduce || !(nRepSlotHandledMask[nMaskOffset] & (1 << (k & 7)))) {
(nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score + nDisableScore))) { const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
int nRepOffset = cur_arrival[j].rep_offset; int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
int exists = 0; int nScore = cur_arrival[j].score + 2;
for (n = 0; if (nRepCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
n < nArrivalsPerPosition && pDestSlots[n].cost < nRepCodingChoiceCost; (nRepCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score + nDisableScore))) {
n++) { int nRepOffset = cur_arrival[j].rep_offset;
if (pDestSlots[n].rep_offset == nRepOffset) { int exists = 0;
exists = 1;
if (!nReduce)
nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7);
break;
}
}
if (!exists) { for (n = 0;
for (; n < nArrivalsPerPosition && pDestSlots[n].cost < nRepCodingChoiceCost;
n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score + nDisableScore);
n++) { n++) {
if (pDestSlots[n].rep_offset == nRepOffset) { if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1; exists = 1;
if (!nReduce)
nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7);
break; break;
} }
} }
if (!exists) { if (!exists) {
if (n < nArrivalsPerPosition) { for (;
int nn; n < nArrivalsPerPosition && pDestSlots[n].cost == nRepCodingChoiceCost && nScore >= (pDestSlots[n].score + nDisableScore);
n++) {
for (nn = n; if (pDestSlots[n].rep_offset == nRepOffset) {
nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost; exists = 1;
nn++) { break;
if (pDestSlots[nn].rep_offset == nRepOffset) {
exists = 1;
break;
}
} }
}
if (!exists) { if (!exists) {
int z; if (n < nArrivalsPerPosition) {
int nn;
for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) { for (nn = n;
if (pDestSlots[z].rep_offset == nRepOffset) nn < nArrivalsPerPosition && pDestSlots[nn].cost == nRepCodingChoiceCost;
nn++) {
if (pDestSlots[nn].rep_offset == nRepOffset) {
exists = 1;
break; break;
}
} }
memmove(&pDestSlots[n + 1], if (!exists) {
&pDestSlots[n], int z;
sizeof(lzsa_arrival) * (z - n));
lzsa_arrival* pDestArrival = &pDestSlots[n]; for (z = n; z < nArrivalsPerPosition - 1 && pDestSlots[z].from_slot; z++) {
pDestArrival->cost = nRepCodingChoiceCost; if (pDestSlots[z].rep_offset == nRepOffset)
pDestArrival->from_pos = i; break;
pDestArrival->from_slot = j + 1; }
pDestArrival->match_len = k;
pDestArrival->num_literals = 0; memmove(&pDestSlots[n + 1],
pDestArrival->score = nScore; &pDestSlots[n],
pDestArrival->rep_offset = nRepOffset; sizeof(lzsa_arrival) * (z - n));
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k; lzsa_arrival* pDestArrival = &pDestSlots[n];
nRepLenHandledMask[k >> 3] &= ~(1 << (k & 7)); pDestArrival->cost = nRepCodingChoiceCost;
pDestArrival->from_pos = i;
pDestArrival->from_slot = j + 1;
pDestArrival->match_len = k;
pDestArrival->num_literals = 0;
pDestArrival->score = nScore;
pDestArrival->rep_offset = nRepOffset;
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
}
} }
} }
} }
} }
} else {
else { break;
break; }
} }
} }
} }