mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2024-11-22 21:32:07 +00:00
Ratio increase
This commit is contained in:
parent
fc5f540a68
commit
e9ca5032bc
@ -91,7 +91,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
|
||||
int nMinMatchSize = pCompressor->min_match_size;
|
||||
|
||||
if (pCompressor->format_version >= 2) {
|
||||
for (i = 1; i < nInWindowSize - 1; i++) {
|
||||
for (i = 1; i < nInWindowSize; i++) {
|
||||
int nIndex = (int)(intervals[i] & POS_MASK);
|
||||
int nLen = PLCP[nIndex];
|
||||
if (nLen < nMinMatchSize)
|
||||
@ -206,6 +206,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
unsigned int super_ref;
|
||||
unsigned int match_pos;
|
||||
lzsa_match *matchptr;
|
||||
int nPrevOffset = 0;
|
||||
|
||||
/**
|
||||
* Find matches using intervals
|
||||
@ -245,19 +246,34 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
int nMatchOffset = (int)(nOffset - match_pos);
|
||||
|
||||
if (nMatchOffset <= MAX_OFFSET) {
|
||||
if (pCompressor->format_version >= 2) {
|
||||
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
||||
}
|
||||
else {
|
||||
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
|
||||
}
|
||||
matchptr->offset = (unsigned short)nMatchOffset;
|
||||
matchptr++;
|
||||
|
||||
nPrevOffset = nMatchOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
if ((super_ref = pos_data[match_pos]) > ref) {
|
||||
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||
|
||||
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
|
||||
if ((matchptr - pMatches) < nMaxMatches) {
|
||||
int nMatchOffset = (int)(nOffset - match_pos);
|
||||
|
||||
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
||||
matchptr->offset = (unsigned short)nMatchOffset;
|
||||
matchptr++;
|
||||
|
||||
nPrevOffset = nMatchOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while ((super_ref = pos_data[match_pos]) > ref)
|
||||
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
|
||||
@ -266,7 +282,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
if ((matchptr - pMatches) < nMaxMatches) {
|
||||
int nMatchOffset = (int)(nOffset - match_pos);
|
||||
|
||||
if (nMatchOffset <= MAX_OFFSET) {
|
||||
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||
if (pCompressor->format_version >= 2) {
|
||||
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
||||
}
|
||||
@ -282,6 +298,23 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
||||
break;
|
||||
ref = super_ref;
|
||||
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||
|
||||
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
|
||||
if ((matchptr - pMatches) < nMaxMatches) {
|
||||
int nMatchOffset = (int)(nOffset - match_pos);
|
||||
|
||||
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
||||
matchptr->offset = (unsigned short)nMatchOffset;
|
||||
|
||||
if ((matchptr->length & 0x7fff) > 2) {
|
||||
matchptr++;
|
||||
|
||||
nPrevOffset = nMatchOffset;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return (int)(matchptr - pMatches);
|
||||
|
@ -334,9 +334,13 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
|
||||
lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
|
||||
|
||||
int nMinRepLen[NMATCHES_PER_ARRIVAL_BIG];
|
||||
memset(nMinRepLen, 0, nMatchesPerArrival * sizeof(int));
|
||||
|
||||
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
|
||||
int nMatchLen = match[m].length;
|
||||
int nMatchLen = match[m].length & 0x7fff;
|
||||
int nMatchOffset = match[m].offset;
|
||||
int nScorePenalty = ((match[m].length & 0x8000) >> 15);
|
||||
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
|
||||
int nStartingMatchLen, k;
|
||||
int nMaxRepLen[NMATCHES_PER_ARRIVAL_BIG];
|
||||
@ -354,8 +358,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
else {
|
||||
if (i > nRepOffset &&
|
||||
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
|
||||
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i - nMatchOffset + nCurMaxRepLen])
|
||||
nCurMaxRepLen = nMinRepLen[j];
|
||||
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i + nCurMaxRepLen])
|
||||
nCurMaxRepLen++;
|
||||
nMinRepLen[j] = nCurMaxRepLen;
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -368,13 +374,29 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
if (nInsertForwardReps)
|
||||
lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nMatchesPerArrival, 0);
|
||||
|
||||
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
|
||||
int nMatchLenCost = 0;
|
||||
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) {
|
||||
nStartingMatchLen = nMatchLen;
|
||||
else
|
||||
nMatchLenCost = 4 + 24;
|
||||
}
|
||||
else {
|
||||
nStartingMatchLen = nMinMatchSize;
|
||||
nMatchLenCost = 0;
|
||||
}
|
||||
|
||||
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
||||
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
|
||||
if (k == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
|
||||
nMatchLenCost = 4;
|
||||
}
|
||||
else {
|
||||
if (k == (MATCH_RUN_LEN_V2 + 15 + MIN_MATCH_SIZE_V2))
|
||||
nMatchLenCost = 4 + 8;
|
||||
else {
|
||||
if (k == 256)
|
||||
nMatchLenCost = 4 + 24;
|
||||
}
|
||||
}
|
||||
|
||||
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_ARRIVAL_SHIFT];
|
||||
int nInsertedNoRepMatchCandidate = 0;
|
||||
|
||||
@ -393,7 +415,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
|
||||
if (nCodingChoiceCost <= pDestSlots[nMatchesPerArrival - 1].cost) {
|
||||
int exists = 0;
|
||||
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3;
|
||||
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3 + nScorePenalty;
|
||||
|
||||
for (n = 0;
|
||||
n < nMatchesPerArrival && pDestSlots[n].cost <= nCodingChoiceCost;
|
||||
@ -501,9 +523,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || match[m + 1].length < LCP_MAX))
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
|
||||
|
@ -40,7 +40,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
#define LCP_BITS 14
|
||||
#define TAG_BITS 3
|
||||
#define TAG_BITS 4
|
||||
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
|
||||
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
|
||||
#define LCP_SHIFT (31-LCP_BITS)
|
||||
@ -50,7 +50,7 @@ extern "C" {
|
||||
#define EXCL_VISITED_MASK 0x7fffffff
|
||||
|
||||
#define NMATCHES_PER_ARRIVAL_SMALL 8
|
||||
#define NMATCHES_PER_ARRIVAL_BIG 23
|
||||
#define NMATCHES_PER_ARRIVAL_BIG 32
|
||||
#define MATCHES_PER_ARRIVAL_SHIFT 5
|
||||
|
||||
#define NMATCHES_PER_INDEX_V1 8
|
||||
|
Loading…
Reference in New Issue
Block a user