Ratio increase

This commit is contained in:
Emmanuel Marty 2019-12-15 18:04:16 +01:00 committed by GitHub
parent fc5f540a68
commit e9ca5032bc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 77 additions and 16 deletions

View File

@ -91,7 +91,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
int nMinMatchSize = pCompressor->min_match_size; int nMinMatchSize = pCompressor->min_match_size;
if (pCompressor->format_version >= 2) { if (pCompressor->format_version >= 2) {
for (i = 1; i < nInWindowSize - 1; i++) { for (i = 1; i < nInWindowSize; i++) {
int nIndex = (int)(intervals[i] & POS_MASK); int nIndex = (int)(intervals[i] & POS_MASK);
int nLen = PLCP[nIndex]; int nLen = PLCP[nIndex];
if (nLen < nMinMatchSize) if (nLen < nMinMatchSize)
@ -206,6 +206,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
unsigned int super_ref; unsigned int super_ref;
unsigned int match_pos; unsigned int match_pos;
lzsa_match *matchptr; lzsa_match *matchptr;
int nPrevOffset = 0;
/** /**
* Find matches using intervals * Find matches using intervals
@ -245,19 +246,34 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
int nMatchOffset = (int)(nOffset - match_pos); int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) { if (nMatchOffset <= MAX_OFFSET) {
if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)); matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
}
matchptr->offset = (unsigned short)nMatchOffset; matchptr->offset = (unsigned short)nMatchOffset;
matchptr++; matchptr++;
nPrevOffset = nMatchOffset;
} }
} }
} }
for (;;) { for (;;) {
if ((super_ref = pos_data[match_pos]) > ref) {
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
while ((super_ref = pos_data[match_pos]) > ref) while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK; match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG; intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
@ -266,7 +282,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
if ((matchptr - pMatches) < nMaxMatches) { if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos); int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) { if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
if (pCompressor->format_version >= 2) { if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS)); matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
} }
@ -282,6 +298,23 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
break; break;
ref = super_ref; ref = super_ref;
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK; match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
matchptr->offset = (unsigned short)nMatchOffset;
if ((matchptr->length & 0x7fff) > 2) {
matchptr++;
nPrevOffset = nMatchOffset;
}
}
}
}
} }
return (int)(matchptr - pMatches); return (int)(matchptr - pMatches);

View File

@ -334,9 +334,13 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2); lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
int nMinRepLen[NMATCHES_PER_ARRIVAL_BIG];
memset(nMinRepLen, 0, nMatchesPerArrival * sizeof(int));
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) { for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
int nMatchLen = match[m].length; int nMatchLen = match[m].length & 0x7fff;
int nMatchOffset = match[m].offset; int nMatchOffset = match[m].offset;
int nScorePenalty = ((match[m].length & 0x8000) >> 15);
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16)); int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
int nStartingMatchLen, k; int nStartingMatchLen, k;
int nMaxRepLen[NMATCHES_PER_ARRIVAL_BIG]; int nMaxRepLen[NMATCHES_PER_ARRIVAL_BIG];
@ -354,8 +358,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
else { else {
if (i > nRepOffset && if (i > nRepOffset &&
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) { (i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i - nMatchOffset + nCurMaxRepLen]) nCurMaxRepLen = nMinRepLen[j];
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i + nCurMaxRepLen])
nCurMaxRepLen++; nCurMaxRepLen++;
nMinRepLen[j] = nCurMaxRepLen;
} }
} }
} }
@ -368,13 +374,29 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (nInsertForwardReps) if (nInsertForwardReps)
lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nMatchesPerArrival, 0); lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nMatchesPerArrival, 0);
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) int nMatchLenCost = 0;
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) {
nStartingMatchLen = nMatchLen; nStartingMatchLen = nMatchLen;
else nMatchLenCost = 4 + 24;
}
else {
nStartingMatchLen = nMinMatchSize; nStartingMatchLen = nMinMatchSize;
nMatchLenCost = 0;
}
for (k = nStartingMatchLen; k <= nMatchLen; k++) { for (k = nStartingMatchLen; k <= nMatchLen; k++) {
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2); if (k == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
nMatchLenCost = 4;
}
else {
if (k == (MATCH_RUN_LEN_V2 + 15 + MIN_MATCH_SIZE_V2))
nMatchLenCost = 4 + 8;
else {
if (k == 256)
nMatchLenCost = 4 + 24;
}
}
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_ARRIVAL_SHIFT]; lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_ARRIVAL_SHIFT];
int nInsertedNoRepMatchCandidate = 0; int nInsertedNoRepMatchCandidate = 0;
@ -393,7 +415,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (nCodingChoiceCost <= pDestSlots[nMatchesPerArrival - 1].cost) { if (nCodingChoiceCost <= pDestSlots[nMatchesPerArrival - 1].cost) {
int exists = 0; int exists = 0;
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3; int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3 + nScorePenalty;
for (n = 0; for (n = 0;
n < nMatchesPerArrival && pDestSlots[n].cost <= nCodingChoiceCost; n < nMatchesPerArrival && pDestSlots[n].cost <= nCodingChoiceCost;
@ -501,9 +523,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
} }
} }
} }
else {
break;
} }
} }
} }
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || match[m + 1].length < LCP_MAX))
break;
}
} }
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0]; lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];

View File

@ -40,7 +40,7 @@ extern "C" {
#endif #endif
#define LCP_BITS 14 #define LCP_BITS 14
#define TAG_BITS 3 #define TAG_BITS 4
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1) #define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1)) #define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (31-LCP_BITS) #define LCP_SHIFT (31-LCP_BITS)
@ -50,7 +50,7 @@ extern "C" {
#define EXCL_VISITED_MASK 0x7fffffff #define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_ARRIVAL_SMALL 8 #define NMATCHES_PER_ARRIVAL_SMALL 8
#define NMATCHES_PER_ARRIVAL_BIG 23 #define NMATCHES_PER_ARRIVAL_BIG 32
#define MATCHES_PER_ARRIVAL_SHIFT 5 #define MATCHES_PER_ARRIVAL_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8 #define NMATCHES_PER_INDEX_V1 8