Fix matchfinder limitation

This commit is contained in:
Emmanuel Marty 2019-06-25 11:09:19 +02:00 committed by GitHub
parent 5a531cd4ce
commit 7867618f87
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 13 additions and 16 deletions

View File

@ -46,7 +46,7 @@
#define OPT_RAW 2
#define OPT_FAVOR_RATIO 4
#define TOOL_VERSION "1.0.1"
#define TOOL_VERSION "1.0.2"
/*---------------------------------------------------------------------------*/

View File

@ -192,33 +192,27 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
/* Ascend until we reach a visited interval, the root, or a child of the
* root. Link unvisited intervals to the current suffix as we go. */
while ((super_ref = intervals[ref & POS_MASK]) & LCP_MASK) {
intervals[ref & POS_MASK] = nOffset;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
ref = super_ref;
}
if (super_ref == 0) {
/* In this case, the current interval may be any of:
* (1) the root;
* (2) an unvisited child of the root;
* (3) an interval last visited by suffix 0
*
* We could avoid the ambiguity with (3) by using an lcp
* placeholder value other than 0 to represent "visited", but
* it's fastest to use 0. So we just don't allow matches with
* position 0. */
* (2) an unvisited child of the root */
if (ref != 0) /* Not the root? */
intervals[ref & POS_MASK] = nOffset;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
return 0;
}
/* Ascend indirectly via pos_data[] links. */
match_pos = super_ref;
match_pos = super_ref & EXCL_VISITED_MASK;
matchptr = pMatches;
for (;;) {
while ((super_ref = pos_data[match_pos]) > ref)
match_pos = intervals[super_ref & POS_MASK];
intervals[ref & POS_MASK] = nOffset;
match_pos = intervals[super_ref & POS_MASK] & EXCL_VISITED_MASK;
intervals[ref & POS_MASK] = nOffset | VISITED_FLAG;
pos_data[match_pos] = ref;
if ((matchptr - pMatches) < nMaxMatches) {
@ -234,7 +228,7 @@ int lzsa_find_matches_at(lzsa_compressor *pCompressor, const int nOffset, lzsa_m
if (super_ref == 0)
break;
ref = super_ref;
match_pos = intervals[ref & POS_MASK];
match_pos = intervals[ref & POS_MASK] & EXCL_VISITED_MASK;
}
return (int)(matchptr - pMatches);
@ -263,6 +257,7 @@ void lzsa_skip_matches(lzsa_compressor *pCompressor, const int nStartOffset, con
* the optimizer to look at.
*
* @param pCompressor compression context
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
* @param nStartOffset current offset in input window (typically the number of previously compressed bytes)
* @param nEndOffset offset to end finding matches at (typically the size of the total input window in bytes
*/

View File

@ -39,11 +39,13 @@
extern "C" {
#endif
#define LCP_BITS 15
#define LCP_BITS 14
#define LCP_MAX (1U<<(LCP_BITS - 1))
#define LCP_SHIFT (32-LCP_BITS)
#define LCP_SHIFT (31-LCP_BITS)
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
#define VISITED_FLAG 0x80000000
#define EXCL_VISITED_MASK 0x7fffffff
#define NMATCHES_PER_OFFSET 8
#define MATCHES_PER_OFFSET_SHIFT 3