mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2024-11-29 20:49:25 +00:00
Ratio increase
This commit is contained in:
parent
fc5f540a68
commit
e9ca5032bc
@ -91,7 +91,7 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
|
|||||||
int nMinMatchSize = pCompressor->min_match_size;
|
int nMinMatchSize = pCompressor->min_match_size;
|
||||||
|
|
||||||
if (pCompressor->format_version >= 2) {
|
if (pCompressor->format_version >= 2) {
|
||||||
for (i = 1; i < nInWindowSize - 1; i++) {
|
for (i = 1; i < nInWindowSize; i++) {
|
||||||
int nIndex = (int)(intervals[i] & POS_MASK);
|
int nIndex = (int)(intervals[i] & POS_MASK);
|
||||||
int nLen = PLCP[nIndex];
|
int nLen = PLCP[nIndex];
|
||||||
if (nLen < nMinMatchSize)
|
if (nLen < nMinMatchSize)
|
||||||
@ -206,6 +206,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
|||||||
unsigned int super_ref;
|
unsigned int super_ref;
|
||||||
unsigned int match_pos;
|
unsigned int match_pos;
|
||||||
lzsa_match *matchptr;
|
lzsa_match *matchptr;
|
||||||
|
int nPrevOffset = 0;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find matches using intervals
|
* Find matches using intervals
|
||||||
@ -245,19 +246,34 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
|||||||
int nMatchOffset = (int)(nOffset - match_pos);
|
int nMatchOffset = (int)(nOffset - match_pos);
|
||||||
|
|
||||||
if (nMatchOffset <= MAX_OFFSET) {
|
if (nMatchOffset <= MAX_OFFSET) {
|
||||||
if (pCompressor->format_version >= 2) {
|
|
||||||
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
||||||
}
|
|
||||||
else {
|
|
||||||
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
|
|
||||||
}
|
|
||||||
matchptr->offset = (unsigned short)nMatchOffset;
|
matchptr->offset = (unsigned short)nMatchOffset;
|
||||||
matchptr++;
|
matchptr++;
|
||||||
|
|
||||||
|
nPrevOffset = nMatchOffset;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
for (;;) {
|
for (;;) {
|
||||||
|
if ((super_ref = pos_data[match_pos]) > ref) {
|
||||||
|
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||||
|
|
||||||
|
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
|
||||||
|
if ((matchptr - pMatches) < nMaxMatches) {
|
||||||
|
int nMatchOffset = (int)(nOffset - match_pos);
|
||||||
|
|
||||||
|
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||||
|
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
||||||
|
matchptr->offset = (unsigned short)nMatchOffset;
|
||||||
|
matchptr++;
|
||||||
|
|
||||||
|
nPrevOffset = nMatchOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
while ((super_ref = pos_data[match_pos]) > ref)
|
while ((super_ref = pos_data[match_pos]) > ref)
|
||||||
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
|
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||||
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
|
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
|
||||||
@ -266,7 +282,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
|||||||
if ((matchptr - pMatches) < nMaxMatches) {
|
if ((matchptr - pMatches) < nMaxMatches) {
|
||||||
int nMatchOffset = (int)(nOffset - match_pos);
|
int nMatchOffset = (int)(nOffset - match_pos);
|
||||||
|
|
||||||
if (nMatchOffset <= MAX_OFFSET) {
|
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||||
if (pCompressor->format_version >= 2) {
|
if (pCompressor->format_version >= 2) {
|
||||||
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
|
||||||
}
|
}
|
||||||
@ -282,6 +298,23 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
|
|||||||
break;
|
break;
|
||||||
ref = super_ref;
|
ref = super_ref;
|
||||||
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
|
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
|
||||||
|
|
||||||
|
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
|
||||||
|
if ((matchptr - pMatches) < nMaxMatches) {
|
||||||
|
int nMatchOffset = (int)(nOffset - match_pos);
|
||||||
|
|
||||||
|
if (nMatchOffset <= MAX_OFFSET && nMatchOffset != nPrevOffset) {
|
||||||
|
matchptr->length = ((unsigned short)(ref >> (LCP_SHIFT + TAG_BITS))) | 0x8000;
|
||||||
|
matchptr->offset = (unsigned short)nMatchOffset;
|
||||||
|
|
||||||
|
if ((matchptr->length & 0x7fff) > 2) {
|
||||||
|
matchptr++;
|
||||||
|
|
||||||
|
nPrevOffset = nMatchOffset;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return (int)(matchptr - pMatches);
|
return (int)(matchptr - pMatches);
|
||||||
|
@ -334,9 +334,13 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
|
|
||||||
lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
|
lzsa_match *match = pCompressor->match + ((i - nStartOffset) << MATCHES_PER_INDEX_SHIFT_V2);
|
||||||
|
|
||||||
|
int nMinRepLen[NMATCHES_PER_ARRIVAL_BIG];
|
||||||
|
memset(nMinRepLen, 0, nMatchesPerArrival * sizeof(int));
|
||||||
|
|
||||||
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
|
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
|
||||||
int nMatchLen = match[m].length;
|
int nMatchLen = match[m].length & 0x7fff;
|
||||||
int nMatchOffset = match[m].offset;
|
int nMatchOffset = match[m].offset;
|
||||||
|
int nScorePenalty = ((match[m].length & 0x8000) >> 15);
|
||||||
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
|
int nNoRepmatchOffsetCost = (nMatchOffset <= 32) ? 4 : ((nMatchOffset <= 512) ? 8 : ((nMatchOffset <= (8192 + 512)) ? 12 : 16));
|
||||||
int nStartingMatchLen, k;
|
int nStartingMatchLen, k;
|
||||||
int nMaxRepLen[NMATCHES_PER_ARRIVAL_BIG];
|
int nMaxRepLen[NMATCHES_PER_ARRIVAL_BIG];
|
||||||
@ -354,8 +358,10 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
else {
|
else {
|
||||||
if (i > nRepOffset &&
|
if (i > nRepOffset &&
|
||||||
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
|
(i - nRepOffset + nMatchLen) <= (nEndOffset - LAST_LITERALS)) {
|
||||||
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i - nMatchOffset + nCurMaxRepLen])
|
nCurMaxRepLen = nMinRepLen[j];
|
||||||
|
while (nCurMaxRepLen < nMatchLen && pInWindow[i - nRepOffset + nCurMaxRepLen] == pInWindow[i + nCurMaxRepLen])
|
||||||
nCurMaxRepLen++;
|
nCurMaxRepLen++;
|
||||||
|
nMinRepLen[j] = nCurMaxRepLen;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -368,13 +374,29 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
if (nInsertForwardReps)
|
if (nInsertForwardReps)
|
||||||
lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nMatchesPerArrival, 0);
|
lzsa_insert_forward_match_v2(pCompressor, pInWindow, i, nMatchOffset, nStartOffset, nEndOffset, nMatchesPerArrival, 0);
|
||||||
|
|
||||||
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE)
|
int nMatchLenCost = 0;
|
||||||
|
if (nMatchLen >= LEAVE_ALONE_MATCH_SIZE) {
|
||||||
nStartingMatchLen = nMatchLen;
|
nStartingMatchLen = nMatchLen;
|
||||||
else
|
nMatchLenCost = 4 + 24;
|
||||||
|
}
|
||||||
|
else {
|
||||||
nStartingMatchLen = nMinMatchSize;
|
nStartingMatchLen = nMinMatchSize;
|
||||||
|
nMatchLenCost = 0;
|
||||||
|
}
|
||||||
|
|
||||||
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
||||||
int nMatchLenCost = lzsa_get_match_varlen_size_v2(k - MIN_MATCH_SIZE_V2);
|
if (k == (MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2)) {
|
||||||
|
nMatchLenCost = 4;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
if (k == (MATCH_RUN_LEN_V2 + 15 + MIN_MATCH_SIZE_V2))
|
||||||
|
nMatchLenCost = 4 + 8;
|
||||||
|
else {
|
||||||
|
if (k == 256)
|
||||||
|
nMatchLenCost = 4 + 24;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_ARRIVAL_SHIFT];
|
lzsa_arrival *pDestSlots = &arrival[(i + k) << MATCHES_PER_ARRIVAL_SHIFT];
|
||||||
int nInsertedNoRepMatchCandidate = 0;
|
int nInsertedNoRepMatchCandidate = 0;
|
||||||
|
|
||||||
@ -393,7 +415,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
|
|
||||||
if (nCodingChoiceCost <= pDestSlots[nMatchesPerArrival - 1].cost) {
|
if (nCodingChoiceCost <= pDestSlots[nMatchesPerArrival - 1].cost) {
|
||||||
int exists = 0;
|
int exists = 0;
|
||||||
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3;
|
int nScore = arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].score + 3 + nScorePenalty;
|
||||||
|
|
||||||
for (n = 0;
|
for (n = 0;
|
||||||
n < nMatchesPerArrival && pDestSlots[n].cost <= nCodingChoiceCost;
|
n < nMatchesPerArrival && pDestSlots[n].cost <= nCodingChoiceCost;
|
||||||
@ -501,9 +523,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (nMatchLen >= LCP_MAX && ((m + 1) >= NMATCHES_PER_INDEX_V2 || match[m + 1].length < LCP_MAX))
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
|
lzsa_arrival *end_arrival = &arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + 0];
|
||||||
|
@ -40,7 +40,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
#define LCP_BITS 14
|
#define LCP_BITS 14
|
||||||
#define TAG_BITS 3
|
#define TAG_BITS 4
|
||||||
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
|
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
|
||||||
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
|
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
|
||||||
#define LCP_SHIFT (31-LCP_BITS)
|
#define LCP_SHIFT (31-LCP_BITS)
|
||||||
@ -50,7 +50,7 @@ extern "C" {
|
|||||||
#define EXCL_VISITED_MASK 0x7fffffff
|
#define EXCL_VISITED_MASK 0x7fffffff
|
||||||
|
|
||||||
#define NMATCHES_PER_ARRIVAL_SMALL 8
|
#define NMATCHES_PER_ARRIVAL_SMALL 8
|
||||||
#define NMATCHES_PER_ARRIVAL_BIG 23
|
#define NMATCHES_PER_ARRIVAL_BIG 32
|
||||||
#define MATCHES_PER_ARRIVAL_SHIFT 5
|
#define MATCHES_PER_ARRIVAL_SHIFT 5
|
||||||
|
|
||||||
#define NMATCHES_PER_INDEX_V1 8
|
#define NMATCHES_PER_INDEX_V1 8
|
||||||
|
Loading…
Reference in New Issue
Block a user