Simplify code, compress LZSA2 another 15% faster

This commit is contained in:
Emmanuel Marty 2020-07-29 13:01:24 +02:00 committed by GitHub
parent 33eec56b9b
commit 060f5d3350
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 34 additions and 40 deletions

View File

@ -212,14 +212,25 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
nPrevPrevRepPos = nPrevRepPos; nPrevPrevRepPos = nPrevRepPos;
nPrevRepPos = nRepPos; nPrevRepPos = nRepPos;
lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
int r;
for (r = 0; r < NMATCHES_PER_INDEX_V2 && fwd_match[r].length >= MIN_MATCH_SIZE_V2; r++) {
if (fwd_match[r].offset == nMatchOffset) {
r = NMATCHES_PER_INDEX_V2;
break;
}
}
if (r < NMATCHES_PER_INDEX_V2) {
int nMaxRepLen = nEndOffset - nRepPos; int nMaxRepLen = nEndOffset - nRepPos;
if (nMaxRepLen > LCP_MAX) if (nMaxRepLen > LCP_MAX)
nMaxRepLen = LCP_MAX; nMaxRepLen = LCP_MAX;
int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen; int nCurRepLen = (nMinLen > nRepLen) ? nMinLen : nRepLen;
if (nCurRepLen > nMaxRepLen) if (nCurRepLen > nMaxRepLen)
nCurRepLen = nMaxRepLen; nCurRepLen = nMaxRepLen;
const unsigned char *pInWindowMax = pInWindow + nRepPos + nMaxRepLen; const unsigned char* pInWindowMax = pInWindow + nRepPos + nMaxRepLen;
const unsigned char *pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen; const unsigned char* pInWindowAtRepPos = pInWindow + nRepPos + nCurRepLen;
while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8)) while ((pInWindowAtRepPos + 8) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 8))
pInWindowAtRepPos += 8; pInWindowAtRepPos += 8;
while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4)) while ((pInWindowAtRepPos + 4) < pInWindowMax && !memcmp(pInWindowAtRepPos, pInWindowAtRepPos - nMatchOffset, 4))
@ -228,20 +239,6 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
pInWindowAtRepPos++; pInWindowAtRepPos++;
nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos)); nCurRepLen = (int)(pInWindowAtRepPos - (pInWindow + nRepPos));
lzsa_match* fwd_match = pCompressor->match + ((nRepPos - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
int r;
for (r = 0; r < NMATCHES_PER_INDEX_V2 && fwd_match[r].length >= MIN_MATCH_SIZE_V2; r++) {
if (fwd_match[r].offset == nMatchOffset) {
if (fwd_match[r].length < nCurRepLen)
fwd_match[r].length = nCurRepLen;
r = NMATCHES_PER_INDEX_V2;
break;
}
}
if (r < NMATCHES_PER_INDEX_V2) {
fwd_match[r].offset = nMatchOffset; fwd_match[r].offset = nMatchOffset;
fwd_match[r].length = nCurRepLen; fwd_match[r].length = nCurRepLen;
@ -271,11 +268,10 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) { static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) {
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int* rle_end = (int*)pCompressor->intervals /* reuse */; const int* rle_end = (int*)pCompressor->intervals /* reuse */;
char* rep_inserted = pCompressor->rep_inserted;
const int nModeSwitchPenalty = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 0 : MODESWITCH_PENALTY; const int nModeSwitchPenalty = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 0 : MODESWITCH_PENALTY;
const int nMinMatchSize = pCompressor->min_match_size; const int nMinMatchSize = pCompressor->min_match_size;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE); const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
const int nMaxRepInsertedLen = nReduce ? 64 : 0; const int nMaxRepInsertedLen = nReduce ? LEAVE_ALONE_MATCH_SIZE : 0;
const int nLeaveAloneMatchSize = (nArrivalsPerPosition == NARRIVALS_PER_POSITION_V2_SMALL) ? LEAVE_ALONE_MATCH_SIZE_SMALL : LEAVE_ALONE_MATCH_SIZE; const int nLeaveAloneMatchSize = (nArrivalsPerPosition == NARRIVALS_PER_POSITION_V2_SMALL) ? LEAVE_ALONE_MATCH_SIZE_SMALL : LEAVE_ALONE_MATCH_SIZE;
int i, j, n; int i, j, n;
@ -380,11 +376,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
} }
lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2); lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
int nNumArrivalsForThisPos = j, nMaxOverallRepLen = 0; int nNumArrivalsForThisPos = j, nMinOverallRepLen = 0, nMaxOverallRepLen = 0;
int nMinRepLen[NARRIVALS_PER_POSITION_V2_BIG]; int nRepLenForArrival[NARRIVALS_PER_POSITION_V2_BIG];
memset(nMinRepLen, 0, nArrivalsPerPosition * sizeof(int)); memset(nRepLenForArrival, 0, nArrivalsPerPosition * sizeof(int));
memset(rep_inserted, 0, NARRIVALS_PER_POSITION_V2_BIG * (64 / 8) * sizeof(char));
for (j = 0; j < nNumArrivalsForThisPos; j++) { for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nRepOffset = cur_arrival[j].rep_offset; int nRepOffset = cur_arrival[j].rep_offset;
@ -393,19 +388,19 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (i > nRepOffset && if (i > nRepOffset &&
(i + MIN_MATCH_SIZE_V2) <= nEndOffset) { (i + MIN_MATCH_SIZE_V2) <= nEndOffset) {
if (pInWindow[i] == pInWindow[i - nRepOffset]) { if (pInWindow[i] == pInWindow[i - nRepOffset]) {
int nMaxRepLen = nEndOffset - i; int nMaxRepLenForPos = nEndOffset - i;
if (nMaxRepLen > LCP_MAX) if (nMaxRepLenForPos > LCP_MAX)
nMaxRepLen = LCP_MAX; nMaxRepLenForPos = LCP_MAX;
const unsigned char* pInWindowAtPos = pInWindow + i; const unsigned char* pInWindowAtPos = pInWindow + i;
const unsigned char* pInWindowMax = pInWindow + i + nMaxRepLen; const unsigned char* pInWindowMax = pInWindow + i + nMaxRepLenForPos;
int nLen0 = rle_end[i - nRepOffset] - (i - nRepOffset); int nLen0 = rle_end[i - nRepOffset] - (i - nRepOffset);
int nLen1 = rle_end[i] - (i); int nLen1 = rle_end[i] - (i);
int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1; int nMinLen = (nLen0 < nLen1) ? nLen0 : nLen1;
if (nMinLen > nMaxRepLen) if (nMinLen > nMaxRepLenForPos)
nMinLen = nMaxRepLen; nMinLen = nMaxRepLenForPos;
pInWindowAtPos += nMinLen; pInWindowAtPos += nMinLen;
while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 8)) while ((pInWindowAtPos + 8) < pInWindowMax && !memcmp(pInWindowAtPos - nRepOffset, pInWindowAtPos, 8))
@ -414,10 +409,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
pInWindowAtPos += 4; pInWindowAtPos += 4;
while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0]) while (pInWindowAtPos < pInWindowMax && pInWindowAtPos[-nRepOffset] == pInWindowAtPos[0])
pInWindowAtPos++; pInWindowAtPos++;
nMinRepLen[j] = (int)(pInWindowAtPos - (pInWindow + i)); nRepLenForArrival[j] = (int)(pInWindowAtPos - (pInWindow + i));
if (nMaxOverallRepLen < nMinRepLen[j]) if (nMaxOverallRepLen < nRepLenForArrival[j])
nMaxOverallRepLen = nMinRepLen[j]; nMaxOverallRepLen = nRepLenForArrival[j];
} }
} }
} }
@ -549,9 +544,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
/* Insert repmatch candidates */ /* Insert repmatch candidates */
if (k <= nMaxOverallRepLen) { if (k > nMinOverallRepLen && k <= nMaxOverallRepLen) {
for (j = 0; j < nNumArrivalsForThisPos; j++) { for (j = 0; j < nNumArrivalsForThisPos; j++) {
if (nMinRepLen[j] >= k && (k >= 64 || (rep_inserted[(j << 3) + (k >> 3)] & (1 << (k & 7))) == 0)) { if (nRepLenForArrival[j] >= k) {
const int nPrevCost = cur_arrival[j].cost & 0x3fffffff; const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost; int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
int nScore = cur_arrival[j].score + 2; int nScore = cur_arrival[j].score + 2;
@ -619,15 +614,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
} }
} }
} }
if (k < nMaxRepInsertedLen)
rep_inserted[(j << 3) + (k >> 3)] |= (1 << (k & 7));
} }
else { else {
break; break;
} }
} }
} }
if (k < nMaxRepInsertedLen)
nMinOverallRepLen = k;
} }
} }

View File

@ -129,7 +129,6 @@ typedef struct _lzsa_compressor {
int flags; int flags;
int safe_dist; int safe_dist;
int num_commands; int num_commands;
char rep_inserted[NARRIVALS_PER_POSITION_V2_BIG * (64 / 8)];
lzsa_stats stats; lzsa_stats stats;
} lzsa_compressor; } lzsa_compressor;