2019-06-07 23:15:40 +02:00
/*
2019-08-04 16:42:30 +02:00
* shrink_block_v1 . c - LZSA1 block compressor implementation
2019-06-07 23:15:40 +02:00
*
* Copyright ( C ) 2019 Emmanuel Marty
*
* This software is provided ' as - is ' , without any express or implied
* warranty . In no event will the authors be held liable for any damages
* arising from the use of this software .
*
* Permission is granted to anyone to use this software for any purpose ,
* including commercial applications , and to alter it and redistribute it
* freely , subject to the following restrictions :
*
* 1. The origin of this software must not be misrepresented ; you must not
* claim that you wrote the original software . If you use this software
* in a product , an acknowledgment in the product documentation would be
* appreciated but is not required .
* 2. Altered source versions must be plainly marked as such , and must not be
* misrepresented as being the original software .
* 3. This notice may not be removed or altered from any source distribution .
*/
/*
* Uses the libdivsufsort library Copyright ( c ) 2003 - 2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet . https : //github.com/lz4/lz4
* With help , ideas , optimizations and speed measurements by spke < zxintrospec @ gmail . com >
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet . https : //github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme . https : //create.stephan-brumme.com/smallz4/
*
*/
# include <stdlib.h>
# include <string.h>
# include "lib.h"
# include "shrink_block_v1.h"
# include "format.h"
/**
* Get the number of extra bits required to represent a literals length
*
* @ param nLength literals length
*
* @ return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v1 ( const int nLength ) {
if ( nLength < LITERALS_RUN_LEN_V1 ) {
return 0 ;
}
else {
if ( nLength < 256 )
return 8 ;
else {
if ( nLength < 512 )
return 16 ;
else
return 24 ;
}
}
}
/**
* Write extra literals length bytes to output ( compressed ) buffer . The caller must first check that there is enough
* room to write the bytes .
*
* @ param pOutData pointer to output buffer
* @ param nOutOffset current write index into output buffer
* @ param nLength literals length
*/
static inline int lzsa_write_literals_varlen_v1 ( unsigned char * pOutData , int nOutOffset , int nLength ) {
if ( nLength > = LITERALS_RUN_LEN_V1 ) {
if ( nLength < 256 )
pOutData [ nOutOffset + + ] = nLength - LITERALS_RUN_LEN_V1 ;
else {
if ( nLength < 512 ) {
pOutData [ nOutOffset + + ] = 250 ;
pOutData [ nOutOffset + + ] = nLength - 256 ;
}
else {
pOutData [ nOutOffset + + ] = 249 ;
pOutData [ nOutOffset + + ] = nLength & 0xff ;
pOutData [ nOutOffset + + ] = ( nLength > > 8 ) & 0xff ;
}
}
}
return nOutOffset ;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @ param nLength encoded match length ( actual match length - MIN_MATCH_SIZE_V1 )
*
* @ return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v1 ( const int nLength ) {
if ( nLength < MATCH_RUN_LEN_V1 ) {
return 0 ;
}
else {
if ( ( nLength + MIN_MATCH_SIZE_V1 ) < 256 )
return 8 ;
else {
if ( ( nLength + MIN_MATCH_SIZE_V1 ) < 512 )
return 16 ;
else
return 24 ;
}
}
}
/**
* Write extra encoded match length bytes to output ( compressed ) buffer . The caller must first check that there is enough
* room to write the bytes .
*
* @ param pOutData pointer to output buffer
* @ param nOutOffset current write index into output buffer
* @ param nLength encoded match length ( actual match length - MIN_MATCH_SIZE_V1 )
*/
static inline int lzsa_write_match_varlen_v1 ( unsigned char * pOutData , int nOutOffset , int nLength ) {
if ( nLength > = MATCH_RUN_LEN_V1 ) {
if ( ( nLength + MIN_MATCH_SIZE_V1 ) < 256 )
pOutData [ nOutOffset + + ] = nLength - MATCH_RUN_LEN_V1 ;
else {
if ( ( nLength + MIN_MATCH_SIZE_V1 ) < 512 ) {
pOutData [ nOutOffset + + ] = 239 ;
pOutData [ nOutOffset + + ] = nLength + MIN_MATCH_SIZE_V1 - 256 ;
}
else {
pOutData [ nOutOffset + + ] = 238 ;
pOutData [ nOutOffset + + ] = ( nLength + MIN_MATCH_SIZE_V1 ) & 0xff ;
pOutData [ nOutOffset + + ] = ( ( nLength + MIN_MATCH_SIZE_V1 ) > > 8 ) & 0xff ;
}
}
}
return nOutOffset ;
}
/**
2019-08-27 00:51:34 +02:00
* Get offset encoding cost in bits
*
* @ param nMatchOffset offset to get cost of
*
* @ return cost in bits
*/
static inline int lzsa_get_offset_cost_v1 ( const unsigned int nMatchOffset ) {
return ( nMatchOffset < = 256 ) ? 8 : 16 ;
}
/**
* Attempt to pick optimal matches using a forward arrivals parser , so as to produce the smallest possible output that decompresses to the same input
*
* @ param pCompressor compression context
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
*/
2019-10-19 13:10:41 +02:00
static void lzsa_optimize_forward_v1 ( lzsa_compressor * pCompressor , lzsa_match * pBestMatch , const int nStartOffset , const int nEndOffset , const int nReduce ) {
2019-11-12 00:30:24 +01:00
lzsa_arrival * arrival = pCompressor - > arrival - ( nStartOffset < < MATCHES_PER_ARRIVAL_SHIFT ) ;
2019-09-23 20:24:50 +02:00
const int nMinMatchSize = pCompressor - > min_match_size ;
const int nFavorRatio = ( pCompressor - > flags & LZSA_FLAG_FAVOR_RATIO ) ? 1 : 0 ;
2019-10-19 13:10:41 +02:00
const int nDisableScore = nReduce ? 0 : ( 2 * BLOCK_SIZE ) ;
2019-08-27 00:51:34 +02:00
int i , j , n ;
2019-11-12 00:30:24 +01:00
if ( ( nEndOffset - nStartOffset ) > BLOCK_SIZE ) return ;
2019-10-29 10:45:57 +01:00
memset ( arrival + ( nStartOffset < < MATCHES_PER_ARRIVAL_SHIFT ) , 0 , sizeof ( lzsa_arrival ) * ( ( nEndOffset - nStartOffset ) < < MATCHES_PER_ARRIVAL_SHIFT ) ) ;
2019-08-27 00:51:34 +02:00
2019-10-29 10:45:57 +01:00
arrival [ nStartOffset < < MATCHES_PER_ARRIVAL_SHIFT ] . from_slot = - 1 ;
2019-08-27 00:51:34 +02:00
for ( i = nStartOffset ; i ! = ( nEndOffset - 1 ) ; i + + ) {
2019-10-19 13:10:41 +02:00
int m ;
2019-08-27 00:51:34 +02:00
2019-11-13 00:57:09 +01:00
for ( j = 0 ; j < NMATCHES_PER_ARRIVAL_SMALL & & arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . from_slot ; j + + ) {
2019-10-29 10:45:57 +01:00
int nPrevCost = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . cost ;
2019-08-27 00:51:34 +02:00
int nCodingChoiceCost = nPrevCost + 8 /* literal */ ;
2019-10-29 10:45:57 +01:00
int nScore = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . score + 1 ;
int nNumLiterals = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . num_literals + 1 ;
2019-08-27 00:51:34 +02:00
if ( nNumLiterals = = LITERALS_RUN_LEN_V1 | | nNumLiterals = = 256 | | nNumLiterals = = 512 ) {
nCodingChoiceCost + = 8 ;
}
2019-09-23 20:24:50 +02:00
if ( ! nFavorRatio & & nNumLiterals = = 1 )
2019-09-24 14:43:17 +02:00
nCodingChoiceCost + = MODESWITCH_PENALTY ;
2019-09-23 20:24:50 +02:00
2019-11-13 00:57:09 +01:00
for ( n = 0 ; n < NMATCHES_PER_ARRIVAL_SMALL /* we only need the literals + short match cost + long match cost cases */ ; n + + ) {
2019-10-29 10:45:57 +01:00
lzsa_arrival * pDestArrival = & arrival [ ( ( i + 1 ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] ;
2019-10-19 13:10:41 +02:00
if ( pDestArrival - > from_slot = = 0 | |
nCodingChoiceCost < pDestArrival - > cost | |
( nCodingChoiceCost = = pDestArrival - > cost & & nScore < ( pDestArrival - > score + nDisableScore ) ) ) {
2019-10-29 10:45:57 +01:00
memmove ( & arrival [ ( ( i + 1 ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n + 1 ] ,
& arrival [ ( ( i + 1 ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] ,
2019-11-13 00:57:09 +01:00
sizeof ( lzsa_arrival ) * ( NMATCHES_PER_ARRIVAL_SMALL - n - 1 ) ) ;
2019-10-19 13:10:41 +02:00
pDestArrival - > cost = nCodingChoiceCost ;
pDestArrival - > from_pos = i ;
pDestArrival - > from_slot = j + 1 ;
pDestArrival - > match_offset = 0 ;
pDestArrival - > match_len = 0 ;
pDestArrival - > num_literals = nNumLiterals ;
pDestArrival - > score = nScore ;
2019-10-29 10:45:57 +01:00
pDestArrival - > rep_offset = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . rep_offset ;
2019-10-19 13:10:41 +02:00
break ;
}
2019-08-27 00:51:34 +02:00
}
}
2019-11-12 00:30:24 +01:00
const lzsa_match * match = pCompressor - > match + ( ( i - nStartOffset ) < < MATCHES_PER_INDEX_SHIFT_V1 ) ;
2019-09-24 00:21:17 +02:00
2019-10-29 10:45:57 +01:00
for ( m = 0 ; m < NMATCHES_PER_INDEX_V1 & & match [ m ] . length ; m + + ) {
2019-09-24 00:21:17 +02:00
int nMatchLen = match [ m ] . length ;
int nMatchOffsetCost = lzsa_get_offset_cost_v1 ( match [ m ] . offset ) ;
2019-08-27 00:51:34 +02:00
int nStartingMatchLen , k ;
if ( ( i + nMatchLen ) > ( nEndOffset - LAST_LITERALS ) )
nMatchLen = nEndOffset - LAST_LITERALS - i ;
if ( nMatchLen > = LEAVE_ALONE_MATCH_SIZE )
nStartingMatchLen = nMatchLen ;
else
nStartingMatchLen = nMinMatchSize ;
for ( k = nStartingMatchLen ; k < = nMatchLen ; k + + ) {
int nMatchLenCost = lzsa_get_match_varlen_size_v1 ( k - MIN_MATCH_SIZE_V1 ) ;
2019-11-13 00:57:09 +01:00
for ( j = 0 ; j < NMATCHES_PER_ARRIVAL_SMALL & & arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . from_slot ; j + + ) {
2019-10-29 10:45:57 +01:00
int nPrevCost = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . cost ;
2019-08-27 00:51:34 +02:00
int nCodingChoiceCost = nPrevCost + 8 /* token */ /* the actual cost of the literals themselves accumulates up the chain */ + nMatchOffsetCost + nMatchLenCost ;
2019-10-29 10:45:57 +01:00
int nScore = arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . score + 5 ;
2019-09-19 12:57:39 +02:00
int exists = 0 ;
2019-08-27 00:51:34 +02:00
2019-10-29 10:45:57 +01:00
if ( ! nFavorRatio & & ! arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + j ] . num_literals )
2019-09-24 14:43:17 +02:00
nCodingChoiceCost + = MODESWITCH_PENALTY ;
2019-09-23 20:24:50 +02:00
2019-09-19 12:57:39 +02:00
for ( n = 0 ;
2019-11-13 00:57:09 +01:00
n < NMATCHES_PER_ARRIVAL_SMALL & & arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] . from_slot & & arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] . cost < = nCodingChoiceCost ;
2019-09-19 12:57:39 +02:00
n + + ) {
2019-10-29 10:45:57 +01:00
if ( lzsa_get_offset_cost_v1 ( arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] . rep_offset ) = = lzsa_get_offset_cost_v1 ( match [ m ] . offset ) ) {
2019-09-19 12:57:39 +02:00
exists = 1 ;
break ;
}
}
2019-11-13 00:57:09 +01:00
for ( n = 0 ; ! exists & & n < NMATCHES_PER_ARRIVAL_SMALL /* we only need the literals + short match cost + long match cost cases */ ; n + + ) {
2019-10-29 10:45:57 +01:00
lzsa_arrival * pDestArrival = & arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] ;
2019-08-27 00:51:34 +02:00
if ( pDestArrival - > from_slot = = 0 | |
2019-10-19 13:10:41 +02:00
nCodingChoiceCost < pDestArrival - > cost | |
( nCodingChoiceCost = = pDestArrival - > cost & & nScore < ( pDestArrival - > score + nDisableScore ) ) ) {
2019-10-29 10:45:57 +01:00
memmove ( & arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n + 1 ] ,
& arrival [ ( ( i + k ) < < MATCHES_PER_ARRIVAL_SHIFT ) + n ] ,
2019-11-13 00:57:09 +01:00
sizeof ( lzsa_arrival ) * ( NMATCHES_PER_ARRIVAL_SMALL - n - 1 ) ) ;
2019-08-27 00:51:34 +02:00
2019-09-19 12:57:39 +02:00
pDestArrival - > cost = nCodingChoiceCost ;
pDestArrival - > from_pos = i ;
pDestArrival - > from_slot = j + 1 ;
2019-09-24 00:21:17 +02:00
pDestArrival - > match_offset = match [ m ] . offset ;
2019-09-19 12:57:39 +02:00
pDestArrival - > match_len = k ;
pDestArrival - > num_literals = 0 ;
2019-10-19 13:10:41 +02:00
pDestArrival - > score = nScore ;
2019-09-24 00:21:17 +02:00
pDestArrival - > rep_offset = match [ m ] . offset ;
2019-08-27 00:51:34 +02:00
break ;
}
}
}
}
}
}
2019-10-29 10:45:57 +01:00
lzsa_arrival * end_arrival = & arrival [ ( i < < MATCHES_PER_ARRIVAL_SHIFT ) + 0 ] ;
2019-10-19 13:10:41 +02:00
pBestMatch [ i ] . length = 0 ;
pBestMatch [ i ] . offset = 0 ;
2019-08-27 00:51:34 +02:00
while ( end_arrival - > from_slot > 0 & & end_arrival - > from_pos > = 0 ) {
2019-10-19 13:10:41 +02:00
pBestMatch [ end_arrival - > from_pos ] . length = end_arrival - > match_len ;
pBestMatch [ end_arrival - > from_pos ] . offset = end_arrival - > match_offset ;
2019-08-27 00:51:34 +02:00
2019-10-29 10:45:57 +01:00
end_arrival = & arrival [ ( end_arrival - > from_pos < < MATCHES_PER_ARRIVAL_SHIFT ) + ( end_arrival - > from_slot - 1 ) ] ;
2019-08-27 00:51:34 +02:00
}
}
2019-06-07 23:15:40 +02:00
/**
* Attempt to minimize the number of commands issued in the compressed data block , in order to speed up decompression without
* impacting the compression ratio
*
* @ param pCompressor compression context
2019-10-24 13:05:32 +02:00
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
2019-10-19 13:10:41 +02:00
* @ param pBestMatch optimal matches to emit
2019-06-07 23:15:40 +02:00
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
*
* @ return non - zero if the number of tokens was reduced , 0 if it wasn ' t
*/
2019-10-24 13:05:32 +02:00
static int lzsa_optimize_command_count_v1 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , lzsa_match * pBestMatch , const int nStartOffset , const int nEndOffset ) {
2019-06-07 23:15:40 +02:00
int i ;
int nNumLiterals = 0 ;
int nDidReduce = 0 ;
for ( i = nStartOffset ; i < nEndOffset ; ) {
2019-10-19 13:10:41 +02:00
lzsa_match * pMatch = pBestMatch + i ;
2019-06-07 23:15:40 +02:00
if ( pMatch - > length > = MIN_MATCH_SIZE_V1 ) {
2019-09-22 20:34:08 +02:00
if ( pMatch - > length < = 9 /* Don't waste time considering large matches, they will always win over literals */ & &
( i + pMatch - > length ) < nEndOffset /* Don't consider the last token in the block, we can only reduce a match inbetween other tokens */ ) {
int nNextIndex = i + pMatch - > length ;
int nNextLiterals = 0 ;
2019-10-19 13:10:41 +02:00
while ( nNextIndex < nEndOffset & & pBestMatch [ nNextIndex ] . length < MIN_MATCH_SIZE_V1 ) {
2019-09-22 20:34:08 +02:00
nNextLiterals + + ;
nNextIndex + + ;
2019-06-07 23:15:40 +02:00
}
2019-09-22 20:34:08 +02:00
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match, or the end of the input. Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
if ( ( 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( ( pMatch - > offset < = 256 ) ? 8 : 16 ) /* match offset */ + lzsa_get_match_varlen_size_v1 ( pMatch - > length - MIN_MATCH_SIZE_V1 ) +
8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNextLiterals ) ) > =
( 8 /* token */ + ( pMatch - > length < < 3 ) + lzsa_get_literals_varlen_size_v1 ( nNumLiterals + pMatch - > length + nNextLiterals ) ) ) {
/* Reduce */
int nMatchLen = pMatch - > length ;
int j ;
for ( j = 0 ; j < nMatchLen ; j + + ) {
2019-10-19 13:10:41 +02:00
pBestMatch [ i + j ] . length = 0 ;
2019-06-07 23:15:40 +02:00
}
2019-09-22 20:34:08 +02:00
nDidReduce = 1 ;
continue ;
2019-06-07 23:15:40 +02:00
}
}
2019-11-11 18:41:08 +01:00
if ( ( i + pMatch - > length ) < nEndOffset & & pMatch - > offset > 0 & & pMatch - > length > = MIN_MATCH_SIZE_V1 & &
pBestMatch [ i + pMatch - > length ] . offset > 0 & &
2019-10-24 13:05:32 +02:00
pBestMatch [ i + pMatch - > length ] . length > = MIN_MATCH_SIZE_V1 & &
2019-11-11 18:41:08 +01:00
( pMatch - > length + pBestMatch [ i + pMatch - > length ] . length ) > = LEAVE_ALONE_MATCH_SIZE & &
2019-10-24 13:05:32 +02:00
( pMatch - > length + pBestMatch [ i + pMatch - > length ] . length ) < = MAX_VARLEN & &
( i + pMatch - > length ) > pMatch - > offset & &
2019-11-11 18:41:08 +01:00
( i + pMatch - > length ) > pBestMatch [ i + pMatch - > length ] . offset & &
( i + pMatch - > length + pBestMatch [ i + pMatch - > length ] . length ) < nEndOffset & &
2019-10-24 13:05:32 +02:00
! memcmp ( pInWindow + i - pMatch - > offset + pMatch - > length ,
pInWindow + i + pMatch - > length - pBestMatch [ i + pMatch - > length ] . offset ,
pBestMatch [ i + pMatch - > length ] . length ) ) {
2019-06-07 23:15:40 +02:00
2019-11-11 18:41:08 +01:00
int nCurPartialSize = lzsa_get_match_varlen_size_v1 ( pMatch - > length - MIN_MATCH_SIZE_V1 ) ;
nCurPartialSize + = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( 0 ) + ( ( pBestMatch [ i + pMatch - > length ] . offset < = 256 ) ? 8 : 16 ) /* match offset */ + lzsa_get_match_varlen_size_v1 ( pBestMatch [ i + pMatch - > length ] . length - MIN_MATCH_SIZE_V1 ) ;
2019-06-07 23:15:40 +02:00
2019-11-11 18:41:08 +01:00
int nReducedPartialSize = lzsa_get_match_varlen_size_v1 ( pMatch - > length + pBestMatch [ i + pMatch - > length ] . length - MIN_MATCH_SIZE_V1 ) ;
if ( nCurPartialSize > = nReducedPartialSize ) {
int nMatchLen = pMatch - > length ;
/* Join */
pMatch - > length + = pBestMatch [ i + nMatchLen ] . length ;
pBestMatch [ i + nMatchLen ] . offset = 0 ;
pBestMatch [ i + nMatchLen ] . length = - 1 ;
continue ;
}
2019-06-07 23:15:40 +02:00
}
2019-09-22 20:34:08 +02:00
i + = pMatch - > length ;
nNumLiterals = 0 ;
2019-06-07 23:15:40 +02:00
}
else {
nNumLiterals + + ;
i + + ;
}
}
return nDidReduce ;
}
2019-10-19 13:10:41 +02:00
/**
* Get compressed data block size
*
* @ param pCompressor compression context
* @ param pBestMatch optimal matches to emit
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
*
* @ return size of compressed data that will be written to output buffer
*/
static int lzsa_get_compressed_size_v1 ( lzsa_compressor * pCompressor , lzsa_match * pBestMatch , const int nStartOffset , const int nEndOffset ) {
int i ;
int nNumLiterals = 0 ;
int nCompressedSize = 0 ;
for ( i = nStartOffset ; i < nEndOffset ; ) {
const lzsa_match * pMatch = pBestMatch + i ;
if ( pMatch - > length > = MIN_MATCH_SIZE_V1 ) {
int nMatchOffset = pMatch - > offset ;
int nMatchLen = pMatch - > length ;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1 ;
int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V1 ) ? LITERALS_RUN_LEN_V1 : nNumLiterals ;
int nTokenMatchLen = ( nEncodedMatchLen > = MATCH_RUN_LEN_V1 ) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen ;
int nTokenLongOffset = ( nMatchOffset < = 256 ) ? 0x00 : 0x80 ;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( nNumLiterals < < 3 ) + ( nTokenLongOffset ? 16 : 8 ) /* match offset */ + lzsa_get_match_varlen_size_v1 ( nEncodedMatchLen ) ;
nCompressedSize + = nCommandSize ;
nNumLiterals = 0 ;
i + = nMatchLen ;
}
else {
nNumLiterals + + ;
i + + ;
}
}
{
int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V1 ) ? LITERALS_RUN_LEN_V1 : nNumLiterals ;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( nNumLiterals < < 3 ) ;
nCompressedSize + = nCommandSize ;
nNumLiterals = 0 ;
}
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
nCompressedSize + = 8 * 4 ;
}
return nCompressedSize ;
}
2019-06-07 23:15:40 +02:00
/**
* Emit block of compressed data
*
* @ param pCompressor compression context
2019-10-19 13:10:41 +02:00
* @ param pBestMatch optimal matches to emit
2019-06-07 23:15:40 +02:00
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
2019-10-19 13:10:41 +02:00
static int lzsa_write_block_v1 ( lzsa_compressor * pCompressor , lzsa_match * pBestMatch , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset , unsigned char * pOutData , const int nMaxOutDataSize ) {
2019-06-07 23:15:40 +02:00
int i ;
int nNumLiterals = 0 ;
int nInFirstLiteralOffset = 0 ;
int nOutOffset = 0 ;
for ( i = nStartOffset ; i < nEndOffset ; ) {
2019-10-19 13:10:41 +02:00
const lzsa_match * pMatch = pBestMatch + i ;
2019-06-07 23:15:40 +02:00
if ( pMatch - > length > = MIN_MATCH_SIZE_V1 ) {
int nMatchOffset = pMatch - > offset ;
int nMatchLen = pMatch - > length ;
int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V1 ;
int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V1 ) ? LITERALS_RUN_LEN_V1 : nNumLiterals ;
int nTokenMatchLen = ( nEncodedMatchLen > = MATCH_RUN_LEN_V1 ) ? MATCH_RUN_LEN_V1 : nEncodedMatchLen ;
int nTokenLongOffset = ( nMatchOffset < = 256 ) ? 0x00 : 0x80 ;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( nNumLiterals < < 3 ) + ( nTokenLongOffset ? 16 : 8 ) /* match offset */ + lzsa_get_match_varlen_size_v1 ( nEncodedMatchLen ) ;
if ( ( nOutOffset + ( nCommandSize > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
if ( nMatchOffset < MIN_OFFSET | | nMatchOffset > MAX_OFFSET )
return - 1 ;
pOutData [ nOutOffset + + ] = nTokenLongOffset | ( nTokenLiteralsLen < < 4 ) | nTokenMatchLen ;
nOutOffset = lzsa_write_literals_varlen_v1 ( pOutData , nOutOffset , nNumLiterals ) ;
2019-10-09 18:20:22 +02:00
if ( nNumLiterals < pCompressor - > stats . min_literals | | pCompressor - > stats . min_literals = = - 1 )
pCompressor - > stats . min_literals = nNumLiterals ;
if ( nNumLiterals > pCompressor - > stats . max_literals )
pCompressor - > stats . max_literals = nNumLiterals ;
pCompressor - > stats . total_literals + = nNumLiterals ;
pCompressor - > stats . literals_divisor + + ;
2019-06-07 23:15:40 +02:00
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nInFirstLiteralOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
nNumLiterals = 0 ;
}
pOutData [ nOutOffset + + ] = ( - nMatchOffset ) & 0xff ;
if ( nTokenLongOffset ) {
pOutData [ nOutOffset + + ] = ( - nMatchOffset ) > > 8 ;
}
nOutOffset = lzsa_write_match_varlen_v1 ( pOutData , nOutOffset , nEncodedMatchLen ) ;
2019-10-09 18:20:22 +02:00
if ( nMatchOffset < pCompressor - > stats . min_offset | | pCompressor - > stats . min_offset = = - 1 )
pCompressor - > stats . min_offset = nMatchOffset ;
if ( nMatchOffset > pCompressor - > stats . max_offset )
pCompressor - > stats . max_offset = nMatchOffset ;
pCompressor - > stats . total_offsets + = nMatchOffset ;
if ( nMatchLen < pCompressor - > stats . min_match_len | | pCompressor - > stats . min_match_len = = - 1 )
pCompressor - > stats . min_match_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_match_len )
pCompressor - > stats . max_match_len = nMatchLen ;
pCompressor - > stats . total_match_lens + = nMatchLen ;
pCompressor - > stats . match_divisor + + ;
if ( nMatchOffset = = 1 ) {
if ( nMatchLen < pCompressor - > stats . min_rle1_len | | pCompressor - > stats . min_rle1_len = = - 1 )
pCompressor - > stats . min_rle1_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_rle1_len )
pCompressor - > stats . max_rle1_len = nMatchLen ;
pCompressor - > stats . total_rle1_lens + = nMatchLen ;
pCompressor - > stats . rle1_divisor + + ;
}
else if ( nMatchOffset = = 2 ) {
if ( nMatchLen < pCompressor - > stats . min_rle2_len | | pCompressor - > stats . min_rle2_len = = - 1 )
pCompressor - > stats . min_rle2_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_rle2_len )
pCompressor - > stats . max_rle2_len = nMatchLen ;
pCompressor - > stats . total_rle2_lens + = nMatchLen ;
pCompressor - > stats . rle2_divisor + + ;
}
2019-06-07 23:15:40 +02:00
i + = nMatchLen ;
2019-07-24 15:43:44 +02:00
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
int nCurSafeDist = ( i - nStartOffset ) - nOutOffset ;
if ( nCurSafeDist > = 0 & & pCompressor - > safe_dist < nCurSafeDist )
pCompressor - > safe_dist = nCurSafeDist ;
}
2019-06-07 23:15:40 +02:00
pCompressor - > num_commands + + ;
}
else {
if ( nNumLiterals = = 0 )
nInFirstLiteralOffset = i ;
nNumLiterals + + ;
i + + ;
}
}
{
int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V1 ) ? LITERALS_RUN_LEN_V1 : nNumLiterals ;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( nNumLiterals < < 3 ) ;
if ( ( nOutOffset + ( nCommandSize > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK )
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 4 ) | 0x0f ;
else
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 4 ) | 0x00 ;
nOutOffset = lzsa_write_literals_varlen_v1 ( pOutData , nOutOffset , nNumLiterals ) ;
2019-10-09 18:20:22 +02:00
if ( nNumLiterals < pCompressor - > stats . min_literals | | pCompressor - > stats . min_literals = = - 1 )
pCompressor - > stats . min_literals = nNumLiterals ;
if ( nNumLiterals > pCompressor - > stats . max_literals )
pCompressor - > stats . max_literals = nNumLiterals ;
pCompressor - > stats . total_literals + = nNumLiterals ;
pCompressor - > stats . literals_divisor + + ;
2019-06-07 23:15:40 +02:00
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nInFirstLiteralOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
nNumLiterals = 0 ;
}
2019-07-24 15:43:44 +02:00
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
int nCurSafeDist = ( i - nStartOffset ) - nOutOffset ;
if ( nCurSafeDist > = 0 & & pCompressor - > safe_dist < nCurSafeDist )
pCompressor - > safe_dist = nCurSafeDist ;
}
2019-06-07 23:15:40 +02:00
pCompressor - > num_commands + + ;
}
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
/* Emit EOD marker for raw block */
if ( ( nOutOffset + 4 ) > nMaxOutDataSize )
return - 1 ;
pOutData [ nOutOffset + + ] = 0 ;
pOutData [ nOutOffset + + ] = 238 ;
pOutData [ nOutOffset + + ] = 0 ;
pOutData [ nOutOffset + + ] = 0 ;
}
return nOutOffset ;
}
2019-07-01 09:25:19 +02:00
/**
* Emit raw block of uncompressible data
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v1 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset , unsigned char * pOutData , const int nMaxOutDataSize ) {
int nNumLiterals = nEndOffset - nStartOffset ;
int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V1 ) ? LITERALS_RUN_LEN_V1 : nNumLiterals ;
int nOutOffset = 0 ;
int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v1 ( nNumLiterals ) + ( nNumLiterals < < 3 ) + 4 ;
if ( ( nOutOffset + ( nCommandSize > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
pCompressor - > num_commands = 0 ;
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 4 ) | 0x0f ;
nOutOffset = lzsa_write_literals_varlen_v1 ( pOutData , nOutOffset , nNumLiterals ) ;
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nStartOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
nNumLiterals = 0 ;
}
pCompressor - > num_commands + + ;
/* Emit EOD marker for raw block */
pOutData [ nOutOffset + + ] = 0 ;
pOutData [ nOutOffset + + ] = 238 ;
pOutData [ nOutOffset + + ] = 0 ;
pOutData [ nOutOffset + + ] = 0 ;
return nOutOffset ;
}
2019-06-07 23:15:40 +02:00
/**
* Select the most optimal matches , reduce the token count if possible , and then emit a block of compressed LZSA1 data
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
2019-07-23 23:28:52 +02:00
* @ param nPreviousBlockSize number of previously compressed bytes ( or 0 for none )
* @ param nInDataSize number of input bytes to compress
2019-06-07 23:15:40 +02:00
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v1 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nPreviousBlockSize , const int nInDataSize , unsigned char * pOutData , const int nMaxOutDataSize ) {
2019-10-19 13:10:41 +02:00
int nResult , nBaseCompressedSize ;
/* Compress optimally without breaking ties in favor of less tokens */
2019-07-01 09:25:19 +02:00
2019-11-12 00:30:24 +01:00
lzsa_optimize_forward_v1 ( pCompressor , pCompressor - > best_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize , 0 /* reduce */ ) ;
2019-06-07 23:15:40 +02:00
int nDidReduce ;
int nPasses = 0 ;
do {
2019-11-12 00:30:24 +01:00
nDidReduce = lzsa_optimize_command_count_v1 ( pCompressor , pInWindow , pCompressor - > best_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize ) ;
2019-06-07 23:15:40 +02:00
nPasses + + ;
} while ( nDidReduce & & nPasses < 20 ) ;
2019-11-12 00:30:24 +01:00
nBaseCompressedSize = lzsa_get_compressed_size_v1 ( pCompressor , pCompressor - > best_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize ) ;
lzsa_match * pBestMatch = pCompressor - > best_match - nPreviousBlockSize ;
2019-10-19 13:10:41 +02:00
if ( nBaseCompressedSize > 0 & & nInDataSize < 65536 ) {
int nReducedCompressedSize ;
/* Compress optimally and do break ties in favor of less tokens */
2019-11-12 00:30:24 +01:00
lzsa_optimize_forward_v1 ( pCompressor , pCompressor - > improved_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize , 1 /* reduce */ ) ;
2019-10-19 13:10:41 +02:00
nPasses = 0 ;
do {
2019-11-12 00:30:24 +01:00
nDidReduce = lzsa_optimize_command_count_v1 ( pCompressor , pInWindow , pCompressor - > improved_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize ) ;
2019-10-19 13:10:41 +02:00
nPasses + + ;
} while ( nDidReduce & & nPasses < 20 ) ;
2019-11-12 00:30:24 +01:00
nReducedCompressedSize = lzsa_get_compressed_size_v1 ( pCompressor , pCompressor - > improved_match - nPreviousBlockSize , nPreviousBlockSize , nPreviousBlockSize + nInDataSize ) ;
2019-10-19 13:10:41 +02:00
if ( nReducedCompressedSize > 0 & & nReducedCompressedSize < = nBaseCompressedSize ) {
/* Pick the parse with the reduced number of tokens as it didn't negatively affect the size */
2019-11-12 00:30:24 +01:00
pBestMatch = pCompressor - > improved_match - nPreviousBlockSize ;
2019-10-19 13:10:41 +02:00
}
}
nResult = lzsa_write_block_v1 ( pCompressor , pBestMatch , pInWindow , nPreviousBlockSize , nPreviousBlockSize + nInDataSize , pOutData , nMaxOutDataSize ) ;
2019-07-01 09:25:19 +02:00
if ( nResult < 0 & & pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
nResult = lzsa_write_raw_uncompressed_block_v1 ( pCompressor , pInWindow , nPreviousBlockSize , nPreviousBlockSize + nInDataSize , pOutData , nMaxOutDataSize ) ;
}
return nResult ;
2019-06-07 23:15:40 +02:00
}