diff --git a/src/shrink_block_v1.c b/src/shrink_block_v1.c index 135758d..a52c991 100644 --- a/src/shrink_block_v1.c +++ b/src/shrink_block_v1.c @@ -157,7 +157,7 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf * @param nReduce non-zero to reduce the number of tokens when the path costs are equal, zero not to */ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) { - lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); + lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1); const int nMinMatchSize = pCompressor->min_match_size; const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0; const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY; @@ -166,22 +166,22 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return; - for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) { + for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V1); i += NARRIVALS_PER_POSITION_V1) { lzsa_arrival* cur_arrival = &arrival[i]; int j; - memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V2_MAX); + memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V1); - for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX; j++) + for (j = 0; j < NARRIVALS_PER_POSITION_V1; j++) cur_arrival[j].cost = 0x40000000; } - arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0; - arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1; + arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].cost = 0; + arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].from_slot = -1; for (i = nStartOffset; i != nEndOffset; i++) { - lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT]; - lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT]; + lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1]; + lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT_V1]; int j, m; for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) { @@ -208,7 +208,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p pDestArrival->cost = nCodingChoiceCost; pDestArrival->rep_offset = cur_arrival[j].rep_offset; pDestArrival->from_slot = j + 1; - pDestArrival->from_pos = i; + pDestArrival->from_pos = i - nStartOffset; pDestArrival->match_len = 0; pDestArrival->num_literals = nNumLiterals; pDestArrival->score = nScore; @@ -234,7 +234,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p nStartingMatchLen = nMinMatchSize; for (k = nStartingMatchLen; k <= nMatchLen; k++) { const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1); - lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT]; + lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT_V1]; for (j = 0; j < nNumArrivalsForThisPos; j++) { const int nPrevCost = cur_arrival[j].cost; @@ -274,7 +274,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p pDestArrival->cost = nCodingChoiceCost; pDestArrival->rep_offset = match[m].offset; pDestArrival->from_slot = j + 1; - pDestArrival->from_pos = i; + pDestArrival->from_pos = i - nStartOffset; pDestArrival->match_len = k; pDestArrival->num_literals = 0; pDestArrival->score = nScore; @@ -289,16 +289,12 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p } } - const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0]; + const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT_V1) + 0]; - while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && end_arrival->from_pos < nEndOffset) { - pBestMatch[end_arrival->from_pos].length = end_arrival->match_len; - if (end_arrival->match_len) - pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset; - else - pBestMatch[end_arrival->from_pos].offset = 0; - - end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)]; + while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) { + pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len; + pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset: 0; + end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V1) + (end_arrival->from_slot - 1)]; } } diff --git a/src/shrink_block_v2.c b/src/shrink_block_v2.c index a0a1020..e0f5ded 100644 --- a/src/shrink_block_v2.c +++ b/src/shrink_block_v2.c @@ -192,7 +192,7 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf * @param nDepth current insertion depth */ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nDepth) { - const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT); + const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2); const int *rle_len = (int*)pCompressor->intervals /* reuse */; lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */; int j; @@ -285,7 +285,7 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns * @param nArrivalsPerPosition number of arrivals to record per input buffer position */ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) { - lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); + lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2); const int *rle_len = (const int*)pCompressor->intervals /* reuse */; lzsa_match *visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */; char *nRepSlotHandledMask = pCompressor->rep_slot_handled_mask; @@ -299,7 +299,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return; - for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) { + for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V2); i += NARRIVALS_PER_POSITION_V2_MAX) { lzsa_arrival *cur_arrival = &arrival[i]; int j; @@ -309,15 +309,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne cur_arrival[j].cost = 0x40000000; } - arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0; - arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1; + arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2].cost = 0; + arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2].from_slot = -1; if (nInsertForwardReps) { memset(visited + nStartOffset, 0, (nEndOffset - nStartOffset) * sizeof(lzsa_match)); } for (i = nStartOffset; i != nEndOffset; i++) { - lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT]; + lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V2]; lzsa_arrival *pDestLiteralSlots = &cur_arrival[NARRIVALS_PER_POSITION_V2_MAX]; int j, m; @@ -400,11 +400,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne pDestArrival->cost = nCodingChoiceCost; pDestArrival->rep_offset = nRepOffset; pDestArrival->from_slot = j + 1; - pDestArrival->from_pos = i; + pDestArrival->from_pos = i - nStartOffset; pDestArrival->rep_len = cur_arrival[j].rep_len; pDestArrival->match_len = 0; - pDestArrival->rep_pos = cur_arrival[j].rep_pos; pDestArrival->num_literals = nNumLiterals; + pDestArrival->rep_pos = cur_arrival[j].rep_pos; pDestArrival->score = nScore + nDisableScore; } } @@ -522,7 +522,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } } - lzsa_arrival *pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT]; + lzsa_arrival *pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V2]; /* Insert non-repmatch candidate */ @@ -582,11 +582,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne pDestArrival->cost = nCodingChoiceCost; pDestArrival->rep_offset = nMatchOffset; pDestArrival->from_slot = nNonRepMatchArrivalIdx + 1; - pDestArrival->from_pos = i; + pDestArrival->from_pos = i - nStartOffset; pDestArrival->rep_len = k; pDestArrival->match_len = k; - pDestArrival->rep_pos = i; pDestArrival->num_literals = 0; + pDestArrival->rep_pos = i; pDestArrival->score = nNoRepmatchScore + nDisableScore; nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7)); } @@ -667,11 +667,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne pDestArrival->cost = nRepCodingChoiceCost; pDestArrival->rep_offset = nRepOffset; pDestArrival->from_slot = j + 1; - pDestArrival->from_pos = i; + pDestArrival->from_pos = i - nStartOffset; pDestArrival->rep_len = k; pDestArrival->match_len = k; - pDestArrival->rep_pos = i; pDestArrival->num_literals = 0; + pDestArrival->rep_pos = i; pDestArrival->score = nScore + nDisableScore; nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7)); } @@ -698,13 +698,13 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne } if (!nInsertForwardReps) { - const lzsa_arrival* end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0]; + const lzsa_arrival* end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT_V2) + 0]; lzsa_match* pBestMatch = pCompressor->best_match - nStartOffset; - while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && end_arrival->from_pos < nEndOffset) { - pBestMatch[end_arrival->from_pos].length = end_arrival->match_len; - pBestMatch[end_arrival->from_pos].offset = (end_arrival->match_len) ? end_arrival->rep_offset : 0; - end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)]; + while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) { + pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len; + pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset : 0; + end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2) + (end_arrival->from_slot - 1)]; } } } @@ -1451,7 +1451,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne } /* Compress optimally and do break ties in favor of less tokens */ - lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT); + lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT_V2); } /* Try to reduce final command set, wherever possible */ diff --git a/src/shrink_context.c b/src/shrink_context.c index 106abb2..32bfb16 100644 --- a/src/shrink_context.c +++ b/src/shrink_context.c @@ -94,7 +94,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int)); if (pCompressor->open_intervals) { - pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival)); + pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT_V2) * sizeof(lzsa_arrival)); if (pCompressor->arrival) { pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match)); diff --git a/src/shrink_context.h b/src/shrink_context.h index 13537fc..81478b4 100644 --- a/src/shrink_context.h +++ b/src/shrink_context.h @@ -53,7 +53,8 @@ extern "C" { #define NARRIVALS_PER_POSITION_V2_SMALL 9 #define NARRIVALS_PER_POSITION_V2_BIG 32 #define NARRIVALS_PER_POSITION_V2_MAX 64 -#define ARRIVALS_PER_POSITION_SHIFT 6 +#define ARRIVALS_PER_POSITION_SHIFT_V1 3 +#define ARRIVALS_PER_POSITION_SHIFT_V2 6 #define NMATCHES_PER_INDEX_V1 16 #define MATCHES_PER_INDEX_SHIFT_V1 4 @@ -78,11 +79,11 @@ typedef struct _lzsa_arrival { unsigned short rep_offset; short from_slot; - int from_pos; + unsigned short from_pos; unsigned short rep_len; unsigned short match_len; + unsigned short num_literals; int rep_pos; - int num_literals; int score; } lzsa_arrival;