mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2024-12-27 02:32:44 +00:00
Compress LZSA1 another 35% faster
This commit is contained in:
parent
613f3ef0d7
commit
eeec526eeb
@ -157,7 +157,7 @@ static inline int lzsa_write_match_varlen_v1(unsigned char *pOutData, int nOutOf
|
||||
* @param nReduce non-zero to reduce the number of tokens when the path costs are equal, zero not to
|
||||
*/
|
||||
static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *pBestMatch, const int nStartOffset, const int nEndOffset, const int nReduce) {
|
||||
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
|
||||
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1);
|
||||
const int nMinMatchSize = pCompressor->min_match_size;
|
||||
const int nFavorRatio = (pCompressor->flags & LZSA_FLAG_FAVOR_RATIO) ? 1 : 0;
|
||||
const int nModeSwitchPenalty = nFavorRatio ? 0 : MODESWITCH_PENALTY;
|
||||
@ -166,22 +166,22 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
|
||||
|
||||
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
|
||||
|
||||
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) {
|
||||
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V1); i += NARRIVALS_PER_POSITION_V1) {
|
||||
lzsa_arrival* cur_arrival = &arrival[i];
|
||||
int j;
|
||||
|
||||
memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V2_MAX);
|
||||
memset(cur_arrival, 0, sizeof(lzsa_arrival) * NARRIVALS_PER_POSITION_V1);
|
||||
|
||||
for (j = 0; j < NARRIVALS_PER_POSITION_V2_MAX; j++)
|
||||
for (j = 0; j < NARRIVALS_PER_POSITION_V1; j++)
|
||||
cur_arrival[j].cost = 0x40000000;
|
||||
}
|
||||
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].cost = 0;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V1].from_slot = -1;
|
||||
|
||||
for (i = nStartOffset; i != nEndOffset; i++) {
|
||||
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
|
||||
lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT];
|
||||
lzsa_arrival* cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V1];
|
||||
lzsa_arrival* pDestLiteralSlots = &cur_arrival[1 << ARRIVALS_PER_POSITION_SHIFT_V1];
|
||||
int j, m;
|
||||
|
||||
for (j = 0; j < NARRIVALS_PER_POSITION_V1 && cur_arrival[j].from_slot; j++) {
|
||||
@ -208,7 +208,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
|
||||
pDestArrival->cost = nCodingChoiceCost;
|
||||
pDestArrival->rep_offset = cur_arrival[j].rep_offset;
|
||||
pDestArrival->from_slot = j + 1;
|
||||
pDestArrival->from_pos = i;
|
||||
pDestArrival->from_pos = i - nStartOffset;
|
||||
pDestArrival->match_len = 0;
|
||||
pDestArrival->num_literals = nNumLiterals;
|
||||
pDestArrival->score = nScore;
|
||||
@ -234,7 +234,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
|
||||
nStartingMatchLen = nMinMatchSize;
|
||||
for (k = nStartingMatchLen; k <= nMatchLen; k++) {
|
||||
const int nMatchLenCost = lzsa_get_match_varlen_size_v1(k - MIN_MATCH_SIZE_V1);
|
||||
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT];
|
||||
lzsa_arrival *pDestSlots = &arrival[(i + k) << ARRIVALS_PER_POSITION_SHIFT_V1];
|
||||
|
||||
for (j = 0; j < nNumArrivalsForThisPos; j++) {
|
||||
const int nPrevCost = cur_arrival[j].cost;
|
||||
@ -274,7 +274,7 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
|
||||
pDestArrival->cost = nCodingChoiceCost;
|
||||
pDestArrival->rep_offset = match[m].offset;
|
||||
pDestArrival->from_slot = j + 1;
|
||||
pDestArrival->from_pos = i;
|
||||
pDestArrival->from_pos = i - nStartOffset;
|
||||
pDestArrival->match_len = k;
|
||||
pDestArrival->num_literals = 0;
|
||||
pDestArrival->score = nScore;
|
||||
@ -289,16 +289,12 @@ static void lzsa_optimize_forward_v1(lzsa_compressor *pCompressor, lzsa_match *p
|
||||
}
|
||||
}
|
||||
|
||||
const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
|
||||
const lzsa_arrival *end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT_V1) + 0];
|
||||
|
||||
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && end_arrival->from_pos < nEndOffset) {
|
||||
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
|
||||
if (end_arrival->match_len)
|
||||
pBestMatch[end_arrival->from_pos].offset = end_arrival->rep_offset;
|
||||
else
|
||||
pBestMatch[end_arrival->from_pos].offset = 0;
|
||||
|
||||
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
|
||||
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) {
|
||||
pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len;
|
||||
pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset: 0;
|
||||
end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V1) + (end_arrival->from_slot - 1)];
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -192,7 +192,7 @@ static inline int lzsa_write_match_varlen_v2(unsigned char *pOutData, int nOutOf
|
||||
* @param nDepth current insertion depth
|
||||
*/
|
||||
static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int i, const int nMatchOffset, const int nStartOffset, const int nEndOffset, const int nDepth) {
|
||||
const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT);
|
||||
const lzsa_arrival *arrival = pCompressor->arrival + ((i - nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2);
|
||||
const int *rle_len = (int*)pCompressor->intervals /* reuse */;
|
||||
lzsa_match* visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
|
||||
int j;
|
||||
@ -285,7 +285,7 @@ static void lzsa_insert_forward_match_v2(lzsa_compressor *pCompressor, const uns
|
||||
* @param nArrivalsPerPosition number of arrivals to record per input buffer position
|
||||
*/
|
||||
static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigned char *pInWindow, const int nStartOffset, const int nEndOffset, const int nReduce, const int nInsertForwardReps, const int nArrivalsPerPosition) {
|
||||
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT);
|
||||
lzsa_arrival *arrival = pCompressor->arrival - (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2);
|
||||
const int *rle_len = (const int*)pCompressor->intervals /* reuse */;
|
||||
lzsa_match *visited = ((lzsa_match*)pCompressor->pos_data) - nStartOffset /* reuse */;
|
||||
char *nRepSlotHandledMask = pCompressor->rep_slot_handled_mask;
|
||||
@ -299,7 +299,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
|
||||
if ((nEndOffset - nStartOffset) > BLOCK_SIZE) return;
|
||||
|
||||
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT); i += NARRIVALS_PER_POSITION_V2_MAX) {
|
||||
for (i = (nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2); i != ((nEndOffset + 1) << ARRIVALS_PER_POSITION_SHIFT_V2); i += NARRIVALS_PER_POSITION_V2_MAX) {
|
||||
lzsa_arrival *cur_arrival = &arrival[i];
|
||||
int j;
|
||||
|
||||
@ -309,15 +309,15 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
cur_arrival[j].cost = 0x40000000;
|
||||
}
|
||||
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].cost = 0;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT].from_slot = -1;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2].cost = 0;
|
||||
arrival[nStartOffset << ARRIVALS_PER_POSITION_SHIFT_V2].from_slot = -1;
|
||||
|
||||
if (nInsertForwardReps) {
|
||||
memset(visited + nStartOffset, 0, (nEndOffset - nStartOffset) * sizeof(lzsa_match));
|
||||
}
|
||||
|
||||
for (i = nStartOffset; i != nEndOffset; i++) {
|
||||
lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT];
|
||||
lzsa_arrival *cur_arrival = &arrival[i << ARRIVALS_PER_POSITION_SHIFT_V2];
|
||||
lzsa_arrival *pDestLiteralSlots = &cur_arrival[NARRIVALS_PER_POSITION_V2_MAX];
|
||||
int j, m;
|
||||
|
||||
@ -400,11 +400,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
pDestArrival->cost = nCodingChoiceCost;
|
||||
pDestArrival->rep_offset = nRepOffset;
|
||||
pDestArrival->from_slot = j + 1;
|
||||
pDestArrival->from_pos = i;
|
||||
pDestArrival->from_pos = i - nStartOffset;
|
||||
pDestArrival->rep_len = cur_arrival[j].rep_len;
|
||||
pDestArrival->match_len = 0;
|
||||
pDestArrival->rep_pos = cur_arrival[j].rep_pos;
|
||||
pDestArrival->num_literals = nNumLiterals;
|
||||
pDestArrival->rep_pos = cur_arrival[j].rep_pos;
|
||||
pDestArrival->score = nScore + nDisableScore;
|
||||
}
|
||||
}
|
||||
@ -522,7 +522,7 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
}
|
||||
}
|
||||
|
||||
lzsa_arrival *pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT];
|
||||
lzsa_arrival *pDestSlots = &cur_arrival[k << ARRIVALS_PER_POSITION_SHIFT_V2];
|
||||
|
||||
/* Insert non-repmatch candidate */
|
||||
|
||||
@ -582,11 +582,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
pDestArrival->cost = nCodingChoiceCost;
|
||||
pDestArrival->rep_offset = nMatchOffset;
|
||||
pDestArrival->from_slot = nNonRepMatchArrivalIdx + 1;
|
||||
pDestArrival->from_pos = i;
|
||||
pDestArrival->from_pos = i - nStartOffset;
|
||||
pDestArrival->rep_len = k;
|
||||
pDestArrival->match_len = k;
|
||||
pDestArrival->rep_pos = i;
|
||||
pDestArrival->num_literals = 0;
|
||||
pDestArrival->rep_pos = i;
|
||||
pDestArrival->score = nNoRepmatchScore + nDisableScore;
|
||||
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
|
||||
}
|
||||
@ -667,11 +667,11 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
pDestArrival->cost = nRepCodingChoiceCost;
|
||||
pDestArrival->rep_offset = nRepOffset;
|
||||
pDestArrival->from_slot = j + 1;
|
||||
pDestArrival->from_pos = i;
|
||||
pDestArrival->from_pos = i - nStartOffset;
|
||||
pDestArrival->rep_len = k;
|
||||
pDestArrival->match_len = k;
|
||||
pDestArrival->rep_pos = i;
|
||||
pDestArrival->num_literals = 0;
|
||||
pDestArrival->rep_pos = i;
|
||||
pDestArrival->score = nScore + nDisableScore;
|
||||
nRepLenHandledMask[k >> 3] &= ~((1 ^ nReduce) << (k & 7));
|
||||
}
|
||||
@ -698,13 +698,13 @@ static void lzsa_optimize_forward_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
}
|
||||
|
||||
if (!nInsertForwardReps) {
|
||||
const lzsa_arrival* end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT) + 0];
|
||||
const lzsa_arrival* end_arrival = &arrival[(i << ARRIVALS_PER_POSITION_SHIFT_V2) + 0];
|
||||
lzsa_match* pBestMatch = pCompressor->best_match - nStartOffset;
|
||||
|
||||
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && end_arrival->from_pos < nEndOffset) {
|
||||
pBestMatch[end_arrival->from_pos].length = end_arrival->match_len;
|
||||
pBestMatch[end_arrival->from_pos].offset = (end_arrival->match_len) ? end_arrival->rep_offset : 0;
|
||||
end_arrival = &arrival[(end_arrival->from_pos << ARRIVALS_PER_POSITION_SHIFT) + (end_arrival->from_slot - 1)];
|
||||
while (end_arrival->from_slot > 0 && end_arrival->from_pos >= 0 && (end_arrival->from_pos + nStartOffset) < nEndOffset) {
|
||||
pBestMatch[end_arrival->from_pos + nStartOffset].length = end_arrival->match_len;
|
||||
pBestMatch[end_arrival->from_pos + nStartOffset].offset = (end_arrival->match_len) ? end_arrival->rep_offset : 0;
|
||||
end_arrival = &arrival[((end_arrival->from_pos + nStartOffset) << ARRIVALS_PER_POSITION_SHIFT_V2) + (end_arrival->from_slot - 1)];
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1451,7 +1451,7 @@ int lzsa_optimize_and_write_block_v2(lzsa_compressor *pCompressor, const unsigne
|
||||
}
|
||||
|
||||
/* Compress optimally and do break ties in favor of less tokens */
|
||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT);
|
||||
lzsa_optimize_forward_v2(pCompressor, pInWindow, nPreviousBlockSize, nPreviousBlockSize + nInDataSize, 1 /* reduce */, 0 /* use forward reps */, 1 << ARRIVALS_PER_POSITION_SHIFT_V2);
|
||||
}
|
||||
|
||||
/* Try to reduce final command set, wherever possible */
|
||||
|
@ -94,7 +94,7 @@ int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize,
|
||||
pCompressor->open_intervals = (unsigned int *)malloc((LCP_AND_TAG_MAX + 1) * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->open_intervals) {
|
||||
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT) * sizeof(lzsa_arrival));
|
||||
pCompressor->arrival = (lzsa_arrival *)malloc(((BLOCK_SIZE + 1) << ARRIVALS_PER_POSITION_SHIFT_V2) * sizeof(lzsa_arrival));
|
||||
|
||||
if (pCompressor->arrival) {
|
||||
pCompressor->best_match = (lzsa_match *)malloc(BLOCK_SIZE * sizeof(lzsa_match));
|
||||
|
@ -53,7 +53,8 @@ extern "C" {
|
||||
#define NARRIVALS_PER_POSITION_V2_SMALL 9
|
||||
#define NARRIVALS_PER_POSITION_V2_BIG 32
|
||||
#define NARRIVALS_PER_POSITION_V2_MAX 64
|
||||
#define ARRIVALS_PER_POSITION_SHIFT 6
|
||||
#define ARRIVALS_PER_POSITION_SHIFT_V1 3
|
||||
#define ARRIVALS_PER_POSITION_SHIFT_V2 6
|
||||
|
||||
#define NMATCHES_PER_INDEX_V1 16
|
||||
#define MATCHES_PER_INDEX_SHIFT_V1 4
|
||||
@ -78,11 +79,11 @@ typedef struct _lzsa_arrival {
|
||||
unsigned short rep_offset;
|
||||
short from_slot;
|
||||
|
||||
int from_pos;
|
||||
unsigned short from_pos;
|
||||
unsigned short rep_len;
|
||||
unsigned short match_len;
|
||||
unsigned short num_literals;
|
||||
int rep_pos;
|
||||
int num_literals;
|
||||
int score;
|
||||
} lzsa_arrival;
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user