Compress LZSA2 a little bit faster

This commit is contained in:
Emmanuel Marty 2020-12-16 10:03:18 +01:00 committed by GitHub
parent 71493cf889
commit 6bebfb75bd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 36 additions and 20 deletions

View File

@ -271,7 +271,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
const int *rle_end = (int*)pCompressor->intervals /* reuse */;
lzsa_match *visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
char *nRepLenHandledMask = pCompressor->rep_handled_mask;
char *nRepSlotHandledMask = pCompressor->rep_slot_handled_mask;
char *nRepLenHandledMask = pCompressor->rep_len_handled_mask;
const int nModeSwitchPenalty = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 0 : MODESWITCH_PENALTY;
const int nMinMatchSize = pCompressor->min_match_size;
const int nDisableScore = nReduce ? 0 : (2 * BLOCK_SIZE);
@ -300,6 +301,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
for (j = 0; j < nArrivalsPerPosition && cur_arrival[j].from_slot; j++) {
const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
int nCodingChoiceCost = nPrevCost + 8 /* literal */;
int nScore = cur_arrival[j].score + 1;
int nNumLiterals = cur_arrival[j].num_literals + 1;
if (nNumLiterals == LITERALS_RUN_LEN_V2) {
@ -316,7 +318,8 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
nCodingChoiceCost += nModeSwitchPenalty;
lzsa_arrival *pDestSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT];
if (nCodingChoiceCost <= pDestSlots[nArrivalsPerPosition - 1].cost) {
if (nCodingChoiceCost < pDestSlots[nArrivalsPerPosition - 1].cost ||
(nCodingChoiceCost == pDestSlots[nArrivalsPerPosition - 1].cost && nScore < (pDestSlots[nArrivalsPerPosition - 1].score + nDisableScore))) {
int nRepOffset = cur_arrival[j].rep_offset;
int exists = 0;
@ -330,8 +333,6 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
}
if (!exists) {
int nScore = cur_arrival[j].score + 1;
for (;
n < nArrivalsPerPosition && pDestSlots[n].cost == nCodingChoiceCost && nScore >= (pDestSlots[n].score + nDisableScore);
n++) {
@ -427,8 +428,9 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
}
if (!nReduce) {
memset(nRepLenHandledMask, 0, nArrivalsPerPosition * ((LCP_MAX + 1) / 8) * sizeof(char));
memset(nRepSlotHandledMask, 0, nArrivalsPerPosition * ((LCP_MAX + 1) / 8) * sizeof(char));
}
memset(nRepLenHandledMask, 0, ((LCP_MAX + 1) / 8) * sizeof(char));
for (m = 0; m < NMATCHES_PER_INDEX_V2 && match[m].length; m++) {
int nMatchLen = match[m].length & 0x7fff;
@ -551,6 +553,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
pDestArrival->rep_offset = nMatchOffset;
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~(1 << (k & 7));
}
}
}
@ -560,10 +563,12 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
/* Insert repmatch candidates */
if (k > nMinOverallRepLen && k <= nMaxOverallRepLen) {
if (k > nMinOverallRepLen && k <= nMaxOverallRepLen && (nRepLenHandledMask[k >> 3] & (1 << (k & 7))) == 0) {
nRepLenHandledMask[k >> 3] |= 1 << (k & 7);
for (j = 0; j < nNumArrivalsForThisPos; j++) {
int nMaskOffset = (j << 7) + (k >> 3);
if (nRepLenForArrival[j] >= k && (nReduce || !(nRepLenHandledMask[nMaskOffset] & (1 << (k & 7))))) {
if (nRepLenForArrival[j] >= k && (nReduce || !(nRepSlotHandledMask[nMaskOffset] & (1 << (k & 7))))) {
const int nPrevCost = cur_arrival[j].cost & 0x3fffffff;
int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost;
int nScore = cur_arrival[j].score + 2;
@ -579,7 +584,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
if (pDestSlots[n].rep_offset == nRepOffset) {
exists = 1;
if (!nReduce)
nRepLenHandledMask[nMaskOffset] |= 1 << (k & 7);
nRepSlotHandledMask[nMaskOffset] |= 1 << (k & 7);
break;
}
}
@ -629,6 +634,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
pDestArrival->rep_offset = nRepOffset;
pDestArrival->rep_pos = i;
pDestArrival->rep_len = k;
nRepLenHandledMask[k >> 3] &= ~(1 << (k & 7));
}
}
}

View File

@ -62,7 +62,8 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->best_match = NULL;
pCompressor->improved_match = NULL;
pCompressor->arrival = NULL;
pCompressor->rep_handled_mask = NULL;
pCompressor->rep_slot_handled_mask = NULL;
pCompressor->rep_len_handled_mask = NULL;
pCompressor->first_offset_for_byte = NULL;
pCompressor->next_offset_for_pos = NULL;
pCompressor->min_match_size = nMinMatchSize;
@ -107,13 +108,16 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
pCompressor->match = (lzsa_match *)malloc(BLOCK_SIZE * NMATCHES_PER_INDEX_V1 * sizeof(lzsa_match));
if (pCompressor->match) {
if (pCompressor->format_version == 2) {
pCompressor->rep_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
pCompressor->rep_slot_handled_mask = (char*)malloc(NARRIVALS_PER_POSITION_V2_BIG * ((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_slot_handled_mask) {
pCompressor->rep_len_handled_mask = (char*)malloc(((LCP_MAX + 1) / 8) * sizeof(char));
if (pCompressor->rep_len_handled_mask) {
pCompressor->first_offset_for_byte = (int*)malloc(65536 * sizeof(int));
if (pCompressor->first_offset_for_byte) {
pCompressor->next_offset_for_pos = (int*)malloc(BLOCK_SIZE * sizeof(int));
if (pCompressor->next_offset_for_pos) {
return 0;
}
}
}
}
@ -152,9 +156,14 @@ void lzsa_compressor_destroy(lzsa_compressor *pCompressor) {
pCompressor->first_offset_for_byte = NULL;
}
if (pCompressor->rep_handled_mask) {
free(pCompressor->rep_handled_mask);
pCompressor->rep_handled_mask = NULL;
if (pCompressor->rep_len_handled_mask) {
free(pCompressor->rep_len_handled_mask);
pCompressor->rep_len_handled_mask = NULL;
}
if (pCompressor->rep_slot_handled_mask) {
free(pCompressor->rep_slot_handled_mask);
pCompressor->rep_slot_handled_mask = NULL;
}
if (pCompressor->match) {

View File

@ -124,7 +124,8 @@ typedef struct _lzsa_compressor {
lzsa_match *best_match;
lzsa_match *improved_match;
lzsa_arrival *arrival;
char *rep_handled_mask;
char *rep_slot_handled_mask;
char *rep_len_handled_mask;
int *first_offset_for_byte;
int *next_offset_for_pos;
int min_match_size;