2019-06-07 21:15:40 +00:00
|
|
|
/*
|
|
|
|
* shrink_context.h - compression context definitions
|
|
|
|
*
|
|
|
|
* Copyright (C) 2019 Emmanuel Marty
|
|
|
|
*
|
|
|
|
* This software is provided 'as-is', without any express or implied
|
|
|
|
* warranty. In no event will the authors be held liable for any damages
|
|
|
|
* arising from the use of this software.
|
|
|
|
*
|
|
|
|
* Permission is granted to anyone to use this software for any purpose,
|
|
|
|
* including commercial applications, and to alter it and redistribute it
|
|
|
|
* freely, subject to the following restrictions:
|
|
|
|
*
|
|
|
|
* 1. The origin of this software must not be misrepresented; you must not
|
|
|
|
* claim that you wrote the original software. If you use this software
|
|
|
|
* in a product, an acknowledgment in the product documentation would be
|
|
|
|
* appreciated but is not required.
|
|
|
|
* 2. Altered source versions must be plainly marked as such, and must not be
|
|
|
|
* misrepresented as being the original software.
|
|
|
|
* 3. This notice may not be removed or altered from any source distribution.
|
|
|
|
*/
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Uses the libdivsufsort library Copyright (c) 2003-2008 Yuta Mori
|
|
|
|
*
|
|
|
|
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
|
|
|
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
|
|
|
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
|
|
|
* Also with ideas from smallz4 by Stephan Brumme. https://create.stephan-brumme.com/smallz4/
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#ifndef _SHRINK_CONTEXT_H
|
|
|
|
#define _SHRINK_CONTEXT_H
|
|
|
|
|
|
|
|
#include "divsufsort.h"
|
|
|
|
|
2019-06-08 11:35:03 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
extern "C" {
|
|
|
|
#endif
|
|
|
|
|
2019-06-25 09:09:19 +00:00
|
|
|
#define LCP_BITS 14
|
2019-12-15 17:04:16 +00:00
|
|
|
#define TAG_BITS 4
|
2019-11-11 17:41:08 +00:00
|
|
|
#define LCP_MAX ((1U<<(LCP_BITS - TAG_BITS)) - 1)
|
2019-09-17 06:10:52 +00:00
|
|
|
#define LCP_AND_TAG_MAX (1U<<(LCP_BITS - 1))
|
2019-06-25 09:09:19 +00:00
|
|
|
#define LCP_SHIFT (31-LCP_BITS)
|
2019-06-07 21:15:40 +00:00
|
|
|
#define LCP_MASK (((1U<<LCP_BITS) - 1) << LCP_SHIFT)
|
|
|
|
#define POS_MASK ((1U<<LCP_SHIFT) - 1)
|
2019-06-25 09:09:19 +00:00
|
|
|
#define VISITED_FLAG 0x80000000
|
|
|
|
#define EXCL_VISITED_MASK 0x7fffffff
|
2019-06-07 21:15:40 +00:00
|
|
|
|
2020-07-14 20:36:38 +00:00
|
|
|
#define NARRIVALS_PER_POSITION_V1 8
|
|
|
|
#define NARRIVALS_PER_POSITION_V2_SMALL 9
|
|
|
|
#define NARRIVALS_PER_POSITION_V2_BIG 32
|
2021-10-12 13:52:11 +00:00
|
|
|
#define NARRIVALS_PER_POSITION_V2_MAX 64
|
2022-05-04 09:32:21 +00:00
|
|
|
#define ARRIVALS_PER_POSITION_SHIFT_V1 3
|
|
|
|
#define ARRIVALS_PER_POSITION_SHIFT_V2 6
|
2019-10-29 09:45:57 +00:00
|
|
|
|
2021-11-26 14:41:50 +00:00
|
|
|
#define NMATCHES_PER_INDEX_V1 16
|
|
|
|
#define MATCHES_PER_INDEX_SHIFT_V1 4
|
2019-10-29 09:45:57 +00:00
|
|
|
|
2019-10-29 11:09:14 +00:00
|
|
|
#define NMATCHES_PER_INDEX_V2 64
|
|
|
|
#define MATCHES_PER_INDEX_SHIFT_V2 6
|
2019-06-07 21:15:40 +00:00
|
|
|
|
2019-12-09 08:54:56 +00:00
|
|
|
#define LEAVE_ALONE_MATCH_SIZE 300
|
2019-12-15 22:36:51 +00:00
|
|
|
#define LEAVE_ALONE_MATCH_SIZE_SMALL 1000
|
2019-06-07 21:15:40 +00:00
|
|
|
|
2019-09-24 12:43:17 +00:00
|
|
|
#define MODESWITCH_PENALTY 3
|
2019-06-07 21:15:40 +00:00
|
|
|
|
|
|
|
/** One match */
|
|
|
|
typedef struct _lzsa_match {
|
|
|
|
unsigned short length;
|
|
|
|
unsigned short offset;
|
|
|
|
} lzsa_match;
|
|
|
|
|
2019-08-26 22:51:34 +00:00
|
|
|
/** Forward arrival slot */
|
2022-01-02 06:15:23 +00:00
|
|
|
typedef struct _lzsa_arrival {
|
2019-08-26 22:51:34 +00:00
|
|
|
int cost;
|
2019-11-20 14:40:11 +00:00
|
|
|
unsigned short rep_offset;
|
2019-08-26 22:51:34 +00:00
|
|
|
short from_slot;
|
|
|
|
|
2022-05-04 09:32:21 +00:00
|
|
|
unsigned short from_pos;
|
2019-10-22 10:37:46 +00:00
|
|
|
unsigned short rep_len;
|
2020-07-24 15:14:01 +00:00
|
|
|
unsigned short match_len;
|
2022-05-04 09:32:21 +00:00
|
|
|
unsigned short num_literals;
|
2019-10-22 10:37:46 +00:00
|
|
|
int rep_pos;
|
2019-10-11 07:05:58 +00:00
|
|
|
int score;
|
2019-08-26 22:51:34 +00:00
|
|
|
} lzsa_arrival;
|
|
|
|
|
2019-10-09 16:20:22 +00:00
|
|
|
/** Compression statistics */
|
|
|
|
typedef struct _lzsa_stats {
|
|
|
|
int min_literals;
|
|
|
|
int max_literals;
|
|
|
|
int total_literals;
|
|
|
|
|
|
|
|
int min_offset;
|
|
|
|
int max_offset;
|
|
|
|
int num_rep_offsets;
|
|
|
|
int total_offsets;
|
|
|
|
|
|
|
|
int min_match_len;
|
|
|
|
int max_match_len;
|
|
|
|
int total_match_lens;
|
|
|
|
|
|
|
|
int min_rle1_len;
|
|
|
|
int max_rle1_len;
|
|
|
|
int total_rle1_lens;
|
|
|
|
|
|
|
|
int min_rle2_len;
|
|
|
|
int max_rle2_len;
|
|
|
|
int total_rle2_lens;
|
|
|
|
|
|
|
|
int literals_divisor;
|
|
|
|
int match_divisor;
|
|
|
|
int rle1_divisor;
|
|
|
|
int rle2_divisor;
|
|
|
|
} lzsa_stats;
|
|
|
|
|
2019-06-07 21:15:40 +00:00
|
|
|
/** Compression context */
|
|
|
|
typedef struct _lzsa_compressor {
|
|
|
|
divsufsort_ctx_t divsufsort_context;
|
|
|
|
unsigned int *intervals;
|
|
|
|
unsigned int *pos_data;
|
|
|
|
unsigned int *open_intervals;
|
2019-10-11 07:05:58 +00:00
|
|
|
lzsa_match *match;
|
2019-06-07 21:15:40 +00:00
|
|
|
lzsa_match *best_match;
|
2019-08-26 22:51:34 +00:00
|
|
|
lzsa_arrival *arrival;
|
2022-10-19 08:39:40 +00:00
|
|
|
unsigned char *rep_slot_handled_mask;
|
|
|
|
unsigned char *rep_len_handled_mask;
|
2020-08-18 07:13:54 +00:00
|
|
|
int *first_offset_for_byte;
|
|
|
|
int *next_offset_for_pos;
|
2021-06-04 17:11:22 +00:00
|
|
|
int *offset_cache;
|
2019-06-07 21:15:40 +00:00
|
|
|
int min_match_size;
|
|
|
|
int format_version;
|
|
|
|
int flags;
|
2019-07-24 13:43:44 +00:00
|
|
|
int safe_dist;
|
2019-06-07 21:15:40 +00:00
|
|
|
int num_commands;
|
2019-10-09 16:20:22 +00:00
|
|
|
lzsa_stats stats;
|
2019-06-07 21:15:40 +00:00
|
|
|
} lzsa_compressor;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Initialize compression context
|
|
|
|
*
|
|
|
|
* @param pCompressor compression context to initialize
|
|
|
|
* @param nMaxWindowSize maximum size of input data window (previously compressed bytes + bytes to compress)
|
|
|
|
* @param nMinMatchSize minimum match size (cannot be less than MIN_MATCH_SIZE)
|
2022-10-17 07:42:39 +00:00
|
|
|
* @param nFormatVersion version of format to use (1-2)
|
2019-06-07 21:15:40 +00:00
|
|
|
* @param nFlags compression flags
|
|
|
|
*
|
|
|
|
* @return 0 for success, non-zero for failure
|
|
|
|
*/
|
|
|
|
int lzsa_compressor_init(lzsa_compressor *pCompressor, const int nMaxWindowSize, const int nMinMatchSize, const int nFormatVersion, const int nFlags);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Clean up compression context and free up any associated resources
|
|
|
|
*
|
|
|
|
* @param pCompressor compression context to clean up
|
|
|
|
*/
|
|
|
|
void lzsa_compressor_destroy(lzsa_compressor *pCompressor);
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compress one block of data
|
|
|
|
*
|
|
|
|
* @param pCompressor compression context
|
|
|
|
* @param pInWindow pointer to input data window (previously compressed bytes + bytes to compress)
|
|
|
|
* @param nPreviousBlockSize number of previously compressed bytes (or 0 for none)
|
|
|
|
* @param nInDataSize number of input bytes to compress
|
|
|
|
* @param pOutData pointer to output buffer
|
|
|
|
* @param nMaxOutDataSize maximum size of output buffer, in bytes
|
|
|
|
*
|
|
|
|
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
|
|
|
*/
|
2019-07-24 18:08:23 +00:00
|
|
|
int lzsa_compressor_shrink_block(lzsa_compressor *pCompressor, unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize);
|
2019-06-07 21:15:40 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Get the number of compression commands issued in compressed data blocks
|
|
|
|
*
|
|
|
|
* @return number of commands
|
|
|
|
*/
|
|
|
|
int lzsa_compressor_get_command_count(lzsa_compressor *pCompressor);
|
|
|
|
|
2019-06-08 11:35:03 +00:00
|
|
|
#ifdef __cplusplus
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
|
2019-06-07 21:15:40 +00:00
|
|
|
#endif /* _SHRINK_CONTEXT_H */
|