Increase ratio

This commit is contained in:
Emmanuel Marty 2019-12-09 09:54:56 +01:00 committed by GitHub
parent 65a262ec95
commit 4c566286f5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 56 additions and 14 deletions

View File

@ -195,10 +195,11 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* @param nOffset offset to find matches at, in the input window * @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches * @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none) * @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
* *
* @return number of matches * @return number of matches
*/ */
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches) { int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize) {
unsigned int *intervals = pCompressor->intervals; unsigned int *intervals = pCompressor->intervals;
unsigned int *pos_data = pCompressor->pos_data; unsigned int *pos_data = pCompressor->pos_data;
unsigned int ref; unsigned int ref;
@ -238,6 +239,24 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
/* Ascend indirectly via pos_data[] links. */ /* Ascend indirectly via pos_data[] links. */
match_pos = super_ref & EXCL_VISITED_MASK; match_pos = super_ref & EXCL_VISITED_MASK;
matchptr = pMatches; matchptr = pMatches;
if (pCompressor->format_version >= 2 && nInWindowSize < 65536) {
if ((matchptr - pMatches) < nMaxMatches) {
int nMatchOffset = (int)(nOffset - match_pos);
if (nMatchOffset <= MAX_OFFSET) {
if (pCompressor->format_version >= 2) {
matchptr->length = (unsigned short)(ref >> (LCP_SHIFT + TAG_BITS));
}
else {
matchptr->length = (unsigned short)(ref >> LCP_SHIFT);
}
matchptr->offset = (unsigned short)nMatchOffset;
matchptr++;
}
}
}
for (;;) { for (;;) {
while ((super_ref = pos_data[match_pos]) > ref) while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK; match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
@ -282,7 +301,7 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con
/* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However, /* Skipping still requires scanning for matches, as this also performs a lazy update of the intervals. However,
* we don't store the matches. */ * we don't store the matches. */
for (i = nStartOffset; i < nEndOffset; i++) { for (i = nStartOffset; i < nEndOffset; i++) {
lzsa_find_matches_at(pCompressor, i, &match, 0); lzsa_find_matches_at(pCompressor, i, &match, 0, 0);
} }
} }
@ -299,7 +318,7 @@ void lzsa_find_all_matches(lzsa_compressor *pCompressor, const int nMatchesPerOf
int i; int i;
for (i = nStartOffset; i < nEndOffset; i++) { for (i = nStartOffset; i < nEndOffset; i++) {
int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset); int nMatches = lzsa_find_matches_at(pCompressor, i, pMatch, nMatchesPerOffset, nEndOffset - nStartOffset);
while (nMatches < nMatchesPerOffset) { while (nMatches < nMatchesPerOffset) {
pMatch[nMatches].length = 0; pMatch[nMatches].length = 0;

View File

@ -59,10 +59,11 @@ int lzsa_build_suffix_array(lzsa_compressor *pCompressor, const unsigned char *p
* @param nOffset offset to find matches at, in the input window * @param nOffset offset to find matches at, in the input window
* @param pMatches pointer to returned matches * @param pMatches pointer to returned matches
* @param nMaxMatches maximum number of matches to return (0 for none) * @param nMaxMatches maximum number of matches to return (0 for none)
* @param nInWindowSize total input size in bytes (previously compressed bytes + bytes to compress)
* *
* @return number of matches * @return number of matches
*/ */
int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches); int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_match *pMatches, const int nMaxMatches, const int nInWindowSize);
/** /**
* Skip previously compressed bytes * Skip previously compressed bytes

View File

@ -303,9 +303,16 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) { (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
if (pDestArrival->from_slot) { if (pDestArrival->from_slot) {
int z;
for (z = n; z < nMatchesPerArrival - 1; z++) {
if (pDestSlots[z].rep_offset == arrival[(i << MATCHES_PER_ARRIVAL_SHIFT) + j].rep_offset)
break;
}
memmove(&pDestSlots[n + 1], memmove(&pDestSlots[n + 1],
&pDestSlots[n], &pDestSlots[n],
sizeof(lzsa_arrival) * (nMatchesPerArrival - n - 1)); sizeof(lzsa_arrival) * (z - n));
} }
pDestArrival->cost = nCodingChoiceCost; pDestArrival->cost = nCodingChoiceCost;
@ -400,15 +407,25 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
} }
if (!exists) { if (!exists) {
for (n = 0; n < nMatchesPerArrival; n++) { for (n = 0; n < nMatchesPerArrival - 1; n++) {
lzsa_arrival *pDestArrival = &pDestSlots[n]; lzsa_arrival *pDestArrival = &pDestSlots[n];
if (nCodingChoiceCost < pDestArrival->cost || if (nCodingChoiceCost < pDestArrival->cost ||
(nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) { (nCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
if (pDestArrival->from_slot) { if (pDestArrival->from_slot) {
int z;
for (z = n; z < nMatchesPerArrival - 1; z++) {
if (pDestSlots[z].rep_offset == nMatchOffset)
break;
}
if (z == (nMatchesPerArrival - 1) && pDestSlots[z].from_slot && pDestSlots[z].match_len < MIN_MATCH_SIZE_V2)
z--;
memmove(&pDestSlots[n + 1], memmove(&pDestSlots[n + 1],
&pDestSlots[n], &pDestSlots[n],
sizeof(lzsa_arrival) * (nMatchesPerArrival - n - 1)); sizeof(lzsa_arrival) * (z - n));
} }
pDestArrival->cost = nCodingChoiceCost; pDestArrival->cost = nCodingChoiceCost;
@ -456,9 +473,16 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (nRepCodingChoiceCost < pDestArrival->cost || if (nRepCodingChoiceCost < pDestArrival->cost ||
(nRepCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) { (nRepCodingChoiceCost == pDestArrival->cost && nScore < (pDestArrival->score + nDisableScore))) {
if (pDestArrival->from_slot) { if (pDestArrival->from_slot) {
int z;
for (z = n; z < nMatchesPerArrival - 1; z++) {
if (pDestSlots[z].rep_offset == nRepOffset)
break;
}
memmove(&pDestSlots[n + 1], memmove(&pDestSlots[n + 1],
&pDestSlots[n], &pDestSlots[n],
sizeof(lzsa_arrival) * (nMatchesPerArrival - n - 1)); sizeof(lzsa_arrival) * (z - n));
} }
pDestArrival->cost = nRepCodingChoiceCost; pDestArrival->cost = nRepCodingChoiceCost;
@ -1078,8 +1102,6 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match)); memset(pCompressor->best_match, 0, BLOCK_SIZE * sizeof(lzsa_match));
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nMatchesPerArrival); lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, (nInDataSize < 65536) ? 1 : 0 /* insert forward reps */, nMatchesPerArrival);
if (nInDataSize < 65536)
lzsa_optimize_forward_v2(pCompressor, pInWindow, pCompressor->best_match - nPreviousBlockSize, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 0 /* reduce */, 1 /* insert forward reps */, nMatchesPerArrival);
int nDidReduce; int nDidReduce;
int nPasses = 0; int nPasses = 0;

View File

@ -89,7 +89,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int)); pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
if (pCompressor->open_intervals) { if (pCompressor->open_intervals) {
pCompressor->arrival = (lzsa_arrival *)malloc((BLOCK_SIZE + 1) * NMATCHES_PER_ARRIVAL_BIG * sizeof(lzsa_arrival)); pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << MATCHES_PER_ARRIVAL_SHIFT) * sizeof(lzsa_arrival));
if (pCompressor->arrival) { if (pCompressor->arrival) {
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match)); pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));

View File

@ -50,8 +50,8 @@ extern "C" {
#define EXCL_VISITED_MASK 0x7fffffff #define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_ARRIVAL_SMALL 8 #define NMATCHES_PER_ARRIVAL_SMALL 8
#define NMATCHES_PER_ARRIVAL_BIG 16 #define NMATCHES_PER_ARRIVAL_BIG 23
#define MATCHES_PER_ARRIVAL_SHIFT 4 #define MATCHES_PER_ARRIVAL_SHIFT 5
#define NMATCHES_PER_INDEX_V1 8 #define NMATCHES_PER_INDEX_V1 8
#define MATCHES_PER_INDEX_SHIFT_V1 3 #define MATCHES_PER_INDEX_SHIFT_V1 3
@ -59,7 +59,7 @@ extern "C" {
#define NMATCHES_PER_INDEX_V2 64 #define NMATCHES_PER_INDEX_V2 64
#define MATCHES_PER_INDEX_SHIFT_V2 6 #define MATCHES_PER_INDEX_SHIFT_V2 6
#define LEAVE_ALONE_MATCH_SIZE 1000 #define LEAVE_ALONE_MATCH_SIZE 300
#define LAST_LITERALS 0 #define LAST_LITERALS 0