2019-06-07 21:15:40 +00:00
/*
2019-08-04 14:42:30 +00:00
* shrink_block_v2 . c - LZSA2 block compressor implementation
2019-06-07 21:15:40 +00:00
*
* Copyright ( C ) 2019 Emmanuel Marty
*
* This software is provided ' as - is ' , without any express or implied
* warranty . In no event will the authors be held liable for any damages
* arising from the use of this software .
*
* Permission is granted to anyone to use this software for any purpose ,
* including commercial applications , and to alter it and redistribute it
* freely , subject to the following restrictions :
*
* 1. The origin of this software must not be misrepresented ; you must not
* claim that you wrote the original software . If you use this software
* in a product , an acknowledgment in the product documentation would be
* appreciated but is not required .
* 2. Altered source versions must be plainly marked as such , and must not be
* misrepresented as being the original software .
* 3. This notice may not be removed or altered from any source distribution .
*/
/*
* Uses the libdivsufsort library Copyright ( c ) 2003 - 2008 Yuta Mori
*
* Inspired by LZ4 by Yann Collet . https : //github.com/lz4/lz4
* With help , ideas , optimizations and speed measurements by spke < zxintrospec @ gmail . com >
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet . https : //github.com/inikep/lizard
* Also with ideas from smallz4 by Stephan Brumme . https : //create.stephan-brumme.com/smallz4/
*
*/
# include <stdlib.h>
# include <string.h>
# include "lib.h"
# include "shrink_block_v2.h"
# include "format.h"
/**
* Write 4 - bit nibble to output ( compressed ) buffer
*
* @ param pOutData pointer to output buffer
* @ param nOutOffset current write index into output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
* @ param nCurNibbleOffset write index into output buffer , of current byte being filled with nibbles
* @ param nNibbleValue value to write ( 0. .15 )
2022-01-02 06:15:23 +00:00
*
* @ return updated write index into output buffer
2019-06-07 21:15:40 +00:00
*/
2022-01-02 06:15:23 +00:00
static int lzsa_write_nibble_v2 ( unsigned char * pOutData , int nOutOffset , const int nMaxOutDataSize , int * nCurNibbleOffset , const int nNibbleValue ) {
if ( nOutOffset > = 0 ) {
if ( ( * nCurNibbleOffset ) = = - 1 ) {
if ( nOutOffset > = nMaxOutDataSize ) return - 1 ;
( * nCurNibbleOffset ) = nOutOffset ;
pOutData [ nOutOffset + + ] = nNibbleValue < < 4 ;
}
else {
pOutData [ * nCurNibbleOffset ] | = ( nNibbleValue & 0x0f ) ;
( * nCurNibbleOffset ) = - 1 ;
}
2019-06-07 21:15:40 +00:00
}
return nOutOffset ;
}
/**
* Get the number of extra bits required to represent a literals length
*
* @ param nLength literals length
*
* @ return number of extra bits required
*/
static inline int lzsa_get_literals_varlen_size_v2 ( const int nLength ) {
if ( nLength < LITERALS_RUN_LEN_V2 ) {
return 0 ;
}
else {
if ( nLength < ( LITERALS_RUN_LEN_V2 + 15 ) ) {
return 4 ;
}
else {
if ( nLength < 256 )
2022-04-19 07:18:09 +00:00
return 4 + 8 ;
2019-06-07 21:15:40 +00:00
else {
2022-04-19 07:18:09 +00:00
return 4 + 24 ;
2019-06-07 21:15:40 +00:00
}
}
}
}
/**
* Write extra literals length bytes to output ( compressed ) buffer . The caller must first check that there is enough
* room to write the bytes .
*
* @ param pOutData pointer to output buffer
* @ param nOutOffset current write index into output buffer
2020-07-10 06:55:45 +00:00
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
* @ param nCurNibbleOffset write index into output buffer , of current byte being filled with nibbles
2019-06-07 21:15:40 +00:00
* @ param nLength literals length
2022-01-02 06:15:23 +00:00
*
* @ return updated write index into output buffer
2019-06-07 21:15:40 +00:00
*/
2022-01-02 06:15:23 +00:00
static inline int lzsa_write_literals_varlen_v2 ( unsigned char * pOutData , int nOutOffset , const int nMaxOutDataSize , int * nCurNibbleOffset , const int nLength ) {
2019-06-07 21:15:40 +00:00
if ( nLength > = LITERALS_RUN_LEN_V2 ) {
if ( nLength < ( LITERALS_RUN_LEN_V2 + 15 ) ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , nCurNibbleOffset , nLength - LITERALS_RUN_LEN_V2 ) ;
2019-06-07 21:15:40 +00:00
}
else {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , nCurNibbleOffset , 15 ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
if ( nLength < 256 )
pOutData [ nOutOffset + + ] = nLength - 18 ;
else {
pOutData [ nOutOffset + + ] = 239 ;
pOutData [ nOutOffset + + ] = nLength & 0xff ;
pOutData [ nOutOffset + + ] = ( nLength > > 8 ) & 0xff ;
}
}
}
return nOutOffset ;
}
/**
* Get the number of extra bits required to represent an encoded match length
*
* @ param nLength encoded match length ( actual match length - MIN_MATCH_SIZE_V2 )
*
* @ return number of extra bits required
*/
static inline int lzsa_get_match_varlen_size_v2 ( const int nLength ) {
if ( nLength < MATCH_RUN_LEN_V2 ) {
return 0 ;
}
else {
if ( nLength < ( MATCH_RUN_LEN_V2 + 15 ) )
return 4 ;
else {
if ( ( nLength + MIN_MATCH_SIZE_V2 ) < 256 )
2022-04-19 07:18:09 +00:00
return 4 + 8 ;
2019-06-07 21:15:40 +00:00
else {
return 4 + 24 ;
}
}
}
}
/**
* Write extra encoded match length bytes to output ( compressed ) buffer . The caller must first check that there is enough
* room to write the bytes .
*
* @ param pOutData pointer to output buffer
* @ param nOutOffset current write index into output buffer
2020-07-10 06:55:45 +00:00
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
* @ param nCurNibbleOffset write index into output buffer , of current byte being filled with nibbles
2019-06-07 21:15:40 +00:00
* @ param nLength encoded match length ( actual match length - MIN_MATCH_SIZE_V2 )
2022-01-02 06:15:23 +00:00
*
* @ return updated write index into output buffer
2019-06-07 21:15:40 +00:00
*/
2022-01-02 06:15:23 +00:00
static inline int lzsa_write_match_varlen_v2 ( unsigned char * pOutData , int nOutOffset , const int nMaxOutDataSize , int * nCurNibbleOffset , const int nLength ) {
2019-06-07 21:15:40 +00:00
if ( nLength > = MATCH_RUN_LEN_V2 ) {
if ( nLength < ( MATCH_RUN_LEN_V2 + 15 ) ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , nCurNibbleOffset , nLength - MATCH_RUN_LEN_V2 ) ;
2019-06-07 21:15:40 +00:00
}
else {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , nCurNibbleOffset , 15 ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
if ( ( nLength + MIN_MATCH_SIZE_V2 ) < 256 )
pOutData [ nOutOffset + + ] = nLength + MIN_MATCH_SIZE_V2 - 24 ;
else {
pOutData [ nOutOffset + + ] = 233 ;
pOutData [ nOutOffset + + ] = ( nLength + MIN_MATCH_SIZE_V2 ) & 0xff ;
pOutData [ nOutOffset + + ] = ( ( nLength + MIN_MATCH_SIZE_V2 ) > > 8 ) & 0xff ;
}
}
}
return nOutOffset ;
}
2019-10-27 13:55:39 +00:00
/**
* Insert forward rep candidate
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
* @ param i input data window position whose matches are being considered
* @ param nMatchOffset match offset to use as rep candidate
2019-11-11 23:30:24 +00:00
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
2019-10-27 13:55:39 +00:00
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
2019-11-11 23:30:24 +00:00
* @ param nDepth current insertion depth
2019-10-27 13:55:39 +00:00
*/
2022-01-02 06:15:23 +00:00
static void lzsa_insert_forward_match_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int i , const int nMatchOffset , const int nStartOffset , const int nEndOffset , const int nDepth ) {
2022-05-04 09:32:21 +00:00
const lzsa_arrival * arrival = pCompressor - > arrival + ( ( i - nStartOffset ) < < ARRIVALS_PER_POSITION_SHIFT_V2 ) ;
2023-01-30 12:21:43 +00:00
const int * rle_len = ( const int * ) pCompressor - > intervals /* reuse */ ;
2020-08-18 07:13:54 +00:00
lzsa_match * visited = ( ( lzsa_match * ) pCompressor - > pos_data ) - nStartOffset /* reuse */ ;
2019-10-27 13:55:39 +00:00
int j ;
2022-04-02 06:49:26 +00:00
for ( j = 0 ; j < NARRIVALS_PER_POSITION_V2_BIG & & arrival [ j ] . from_slot ; j + + ) {
2021-10-05 10:11:39 +00:00
const int nRepOffset = arrival [ j ] . rep_offset ;
2019-10-27 13:55:39 +00:00
2022-01-02 06:15:23 +00:00
if ( nMatchOffset ! = nRepOffset ) {
2021-10-05 10:11:39 +00:00
const int nRepLen = arrival [ j ] . rep_len ;
2022-01-02 06:15:23 +00:00
const int nRepPos = arrival [ j ] . rep_pos ;
2019-10-27 13:55:39 +00:00
2022-04-02 06:49:26 +00:00
if ( nRepPos > = nStartOffset & &
2022-01-02 06:15:23 +00:00
( nRepPos + nRepLen ) < = nEndOffset ) {
2020-08-18 07:13:54 +00:00
2022-01-02 06:15:23 +00:00
if ( visited [ nRepPos ] . offset ! = nMatchOffset | | visited [ nRepPos ] . length > nRepLen ) {
visited [ nRepPos ] . length = 0 ;
2022-04-02 06:49:26 +00:00
visited [ nRepPos ] . offset = nMatchOffset ;
2020-07-13 17:34:07 +00:00
2022-04-02 06:49:26 +00:00
lzsa_match * fwd_match = pCompressor - > match + ( ( nRepPos - nStartOffset ) < < MATCHES_PER_INDEX_SHIFT_V2 ) ;
if ( fwd_match [ NMATCHES_PER_INDEX_V2 - 1 ] . length = = 0 ) {
if ( nRepPos > = nMatchOffset ) {
const unsigned char * pInWindowStart = pInWindow + nRepPos ;
if ( ! memcmp ( pInWindowStart , pInWindowStart - nMatchOffset , 2 ) ) {
2022-04-03 18:23:55 +00:00
const int nLen0 = rle_len [ nRepPos - nMatchOffset ] ;
const int nLen1 = rle_len [ nRepPos ] ;
const int nMinLen = ( nLen0 < nLen1 ) ? nLen0 : nLen1 ;
2022-04-02 06:49:26 +00:00
2022-04-03 18:23:55 +00:00
if ( nMinLen > = nRepLen | | ! memcmp ( pInWindowStart + nMinLen , pInWindowStart + nMinLen - nMatchOffset , nRepLen - nMinLen ) ) {
2022-04-19 07:18:09 +00:00
int r ;
2022-04-02 06:49:26 +00:00
2022-04-19 07:18:09 +00:00
for ( r = 0 ; fwd_match [ r ] . length ; r + + ) {
if ( fwd_match [ r ] . offset = = nMatchOffset ) {
break ;
}
}
2020-07-13 17:34:07 +00:00
2022-04-19 07:18:09 +00:00
if ( fwd_match [ r ] . length = = 0 ) {
if ( nRepLen > = MIN_MATCH_SIZE_V2 ) {
if ( nRepOffset ) {
2022-04-02 06:49:26 +00:00
int nMaxRepLen = nEndOffset - nRepPos ;
if ( nMaxRepLen > LCP_MAX )
nMaxRepLen = LCP_MAX ;
const int nCurRepLen = ( nMinLen > nRepLen ) ? nMinLen : nRepLen ;
const unsigned char * pInWindowMax = pInWindowStart + nMaxRepLen ;
const unsigned char * pInWindowAtRepPos = pInWindowStart + nCurRepLen ;
if ( pInWindowAtRepPos > pInWindowMax )
pInWindowAtRepPos = pInWindowMax ;
while ( ( pInWindowAtRepPos + 8 ) < pInWindowMax & & ! memcmp ( pInWindowAtRepPos , pInWindowAtRepPos - nMatchOffset , 8 ) )
pInWindowAtRepPos + = 8 ;
while ( ( pInWindowAtRepPos + 4 ) < pInWindowMax & & ! memcmp ( pInWindowAtRepPos , pInWindowAtRepPos - nMatchOffset , 4 ) )
pInWindowAtRepPos + = 4 ;
while ( pInWindowAtRepPos < pInWindowMax & & pInWindowAtRepPos [ 0 ] = = pInWindowAtRepPos [ - nMatchOffset ] )
pInWindowAtRepPos + + ;
2022-10-15 10:10:41 +00:00
fwd_match [ r ] . length = ( const unsigned short ) ( pInWindowAtRepPos - pInWindowStart ) ;
2022-04-02 06:49:26 +00:00
fwd_match [ r ] . offset = nMatchOffset ;
if ( nDepth < 9 )
lzsa_insert_forward_match_v2 ( pCompressor , pInWindow , nRepPos , nMatchOffset , nStartOffset , nEndOffset , nDepth + 1 ) ;
}
2022-01-02 06:15:23 +00:00
}
2021-10-10 05:52:03 +00:00
}
2022-04-03 18:23:55 +00:00
}
else {
visited [ nRepPos ] . length = nRepLen ;
2020-07-13 17:34:07 +00:00
}
}
2020-06-18 15:47:31 +00:00
}
}
2019-10-27 13:55:39 +00:00
}
}
}
}
}
2019-06-07 21:15:40 +00:00
/**
2019-08-26 22:51:34 +00:00
* Attempt to pick optimal matches using a forward arrivals parser , so as to produce the smallest possible output that decompresses to the same input
2019-06-07 21:15:40 +00:00
*
* @ param pCompressor compression context
2019-10-27 13:55:39 +00:00
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
2019-06-07 21:15:40 +00:00
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
2019-12-15 22:36:51 +00:00
* @ param nReduce non - zero to reduce the number of tokens when the path costs are equal , zero not to
2019-10-22 10:37:46 +00:00
* @ param nInsertForwardReps non - zero to insert forward repmatch candidates , zero to use the previously inserted candidates
2020-07-14 21:50:44 +00:00
* @ param nArrivalsPerPosition number of arrivals to record per input buffer position
2019-06-07 21:15:40 +00:00
*/
2022-04-19 07:18:09 +00:00
static void lzsa_optimize_forward_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset , const int nReduce , const int nInsertForwardReps , const int nArrivalsPerPosition ) {
2022-05-04 09:32:21 +00:00
lzsa_arrival * arrival = pCompressor - > arrival - ( nStartOffset < < ARRIVALS_PER_POSITION_SHIFT_V2 ) ;
2022-04-02 06:49:26 +00:00
const int * rle_len = ( const int * ) pCompressor - > intervals /* reuse */ ;
2020-08-18 07:13:54 +00:00
lzsa_match * visited = ( ( lzsa_match * ) pCompressor - > pos_data ) - nStartOffset /* reuse */ ;
2022-10-19 08:39:40 +00:00
unsigned char * nRepSlotHandledMask = pCompressor - > rep_slot_handled_mask ;
unsigned char * nRepLenHandledMask = pCompressor - > rep_len_handled_mask ;
2020-07-10 06:55:45 +00:00
const int nModeSwitchPenalty = ( pCompressor - > flags & LZSA_FLAG_FAVOR_RATIO ) ? 0 : MODESWITCH_PENALTY ;
2019-09-23 18:24:50 +00:00
const int nMinMatchSize = pCompressor - > min_match_size ;
2019-10-11 07:05:58 +00:00
const int nDisableScore = nReduce ? 0 : ( 2 * BLOCK_SIZE ) ;
2020-07-29 11:01:24 +00:00
const int nMaxRepInsertedLen = nReduce ? LEAVE_ALONE_MATCH_SIZE : 0 ;
2020-07-14 21:50:44 +00:00
const int nLeaveAloneMatchSize = ( nArrivalsPerPosition = = NARRIVALS_PER_POSITION_V2_SMALL ) ? LEAVE_ALONE_MATCH_SIZE_SMALL : LEAVE_ALONE_MATCH_SIZE ;
2022-01-02 06:15:23 +00:00
int i ;
2019-08-26 22:51:34 +00:00
2019-11-11 23:30:24 +00:00
if ( ( nEndOffset - nStartOffset ) > BLOCK_SIZE ) return ;
2022-05-04 09:32:21 +00:00
for ( i = ( nStartOffset < < ARRIVALS_PER_POSITION_SHIFT_V2 ) ; i ! = ( ( nEndOffset + 1 ) < < ARRIVALS_PER_POSITION_SHIFT_V2 ) ; i + = NARRIVALS_PER_POSITION_V2_MAX ) {
2022-04-19 07:18:09 +00:00
lzsa_arrival * cur_arrival = & arrival [ i ] ;
2022-01-02 06:15:23 +00:00
int j ;
2019-08-26 22:51:34 +00:00
2022-04-19 07:18:09 +00:00
memset ( cur_arrival , 0 , sizeof ( lzsa_arrival ) * NARRIVALS_PER_POSITION_V2_MAX ) ;
2022-01-02 06:15:23 +00:00
for ( j = 0 ; j < NARRIVALS_PER_POSITION_V2_MAX ; j + + )
2022-04-19 07:18:09 +00:00
cur_arrival [ j ] . cost = 0x40000000 ;
2019-10-09 14:07:29 +00:00
}
2022-05-04 09:32:21 +00:00
arrival [ nStartOffset < < ARRIVALS_PER_POSITION_SHIFT_V2 ] . cost = 0 ;
arrival [ nStartOffset < < ARRIVALS_PER_POSITION_SHIFT_V2 ] . from_slot = - 1 ;
2019-08-26 22:51:34 +00:00
2020-08-18 07:13:54 +00:00
if ( nInsertForwardReps ) {
memset ( visited + nStartOffset , 0 , ( nEndOffset - nStartOffset ) * sizeof ( lzsa_match ) ) ;
}
2019-11-26 19:48:13 +00:00
for ( i = nStartOffset ; i ! = nEndOffset ; i + + ) {
2022-05-04 09:32:21 +00:00
lzsa_arrival * cur_arrival = & arrival [ i < < ARRIVALS_PER_POSITION_SHIFT_V2 ] ;
2022-04-02 06:49:26 +00:00
lzsa_arrival * pDestLiteralSlots = & cur_arrival [ NARRIVALS_PER_POSITION_V2_MAX ] ;
2022-01-02 06:15:23 +00:00
int j , m ;
2019-08-26 22:51:34 +00:00
2020-07-14 21:50:44 +00:00
for ( j = 0 ; j < nArrivalsPerPosition & & cur_arrival [ j ] . from_slot ; j + + ) {
2022-04-05 05:13:03 +00:00
const int nPrevCost = cur_arrival [ j ] . cost ;
2019-08-26 22:51:34 +00:00
int nCodingChoiceCost = nPrevCost + 8 /* literal */ ;
2022-01-02 06:15:23 +00:00
const int nScore = cur_arrival [ j ] . score + 1 - nDisableScore ;
const int nNumLiterals = cur_arrival [ j ] . num_literals + 1 ;
2022-04-02 06:49:26 +00:00
const int nRepOffset = cur_arrival [ j ] . rep_offset ;
2019-08-26 22:51:34 +00:00
2022-04-20 12:06:19 +00:00
switch ( nNumLiterals ) {
case 1 :
2022-01-02 06:15:23 +00:00
nCodingChoiceCost + = nModeSwitchPenalty ;
2022-04-20 12:06:19 +00:00
break ;
case LITERALS_RUN_LEN_V2 :
2019-08-26 22:51:34 +00:00
nCodingChoiceCost + = 4 ;
2022-04-20 12:06:19 +00:00
break ;
case LITERALS_RUN_LEN_V2 + 15 :
nCodingChoiceCost + = 8 ;
break ;
case 256 :
nCodingChoiceCost + = 16 ;
break ;
default :
break ;
2019-08-26 22:51:34 +00:00
}
2021-10-05 10:11:39 +00:00
if ( nCodingChoiceCost < pDestLiteralSlots [ nArrivalsPerPosition - 1 ] . cost | |
2022-04-19 07:18:09 +00:00
( nCodingChoiceCost = = pDestLiteralSlots [ nArrivalsPerPosition - 1 ] . cost & & nScore < pDestLiteralSlots [ nArrivalsPerPosition - 1 ] . score & &
2022-04-02 06:49:26 +00:00
nRepOffset ! = pDestLiteralSlots [ nArrivalsPerPosition - 1 ] . rep_offset ) ) {
2022-01-02 06:15:23 +00:00
int exists = 0 , n ;
2020-05-28 16:38:19 +00:00
2019-10-08 14:23:33 +00:00
for ( n = 0 ;
2021-10-12 19:02:16 +00:00
pDestLiteralSlots [ n ] . cost < nCodingChoiceCost ;
2019-10-08 14:23:33 +00:00
n + + ) {
2021-10-05 10:11:39 +00:00
if ( pDestLiteralSlots [ n ] . rep_offset = = nRepOffset ) {
2019-10-08 14:23:33 +00:00
exists = 1 ;
break ;
}
2019-09-18 22:11:26 +00:00
}
2019-10-10 12:42:08 +00:00
if ( ! exists ) {
2020-07-24 15:14:01 +00:00
for ( ;
2022-04-19 07:18:09 +00:00
n < nArrivalsPerPosition & & pDestLiteralSlots [ n ] . cost = = nCodingChoiceCost & & nScore > = pDestLiteralSlots [ n ] . score ;
2020-07-24 15:14:01 +00:00
n + + ) {
2021-10-05 10:11:39 +00:00
if ( pDestLiteralSlots [ n ] . rep_offset = = nRepOffset ) {
2020-05-28 16:38:19 +00:00
exists = 1 ;
break ;
}
}
2019-08-26 22:51:34 +00:00
2020-05-28 16:38:19 +00:00
if ( ! exists ) {
2020-07-24 15:14:01 +00:00
if ( n < nArrivalsPerPosition ) {
2022-04-02 06:49:26 +00:00
int z ;
2019-12-09 08:54:56 +00:00
2022-04-02 06:49:26 +00:00
for ( z = n ;
z < nArrivalsPerPosition - 1 & & pDestLiteralSlots [ z ] . cost = = nCodingChoiceCost ;
z + + ) {
if ( pDestLiteralSlots [ z ] . rep_offset = = nRepOffset ) {
2020-07-24 15:14:01 +00:00
exists = 1 ;
break ;
}
}
2020-05-28 16:38:19 +00:00
2020-07-24 15:14:01 +00:00
if ( ! exists ) {
2022-04-02 06:49:26 +00:00
for ( ; z < nArrivalsPerPosition - 1 & & pDestLiteralSlots [ z ] . from_slot ; z + + ) {
2021-10-05 10:11:39 +00:00
if ( pDestLiteralSlots [ z ] . rep_offset = = nRepOffset )
2020-07-24 15:14:01 +00:00
break ;
2019-12-09 08:54:56 +00:00
}
2021-10-05 10:11:39 +00:00
memmove ( & pDestLiteralSlots [ n + 1 ] ,
& pDestLiteralSlots [ n ] ,
2020-07-24 15:14:01 +00:00
sizeof ( lzsa_arrival ) * ( z - n ) ) ;
2021-10-05 10:11:39 +00:00
lzsa_arrival * pDestArrival = & pDestLiteralSlots [ n ] ;
2020-05-28 16:38:19 +00:00
pDestArrival - > cost = nCodingChoiceCost ;
2022-04-02 06:49:26 +00:00
pDestArrival - > rep_offset = nRepOffset ;
2020-05-28 16:38:19 +00:00
pDestArrival - > from_slot = j + 1 ;
2022-05-04 09:32:21 +00:00
pDestArrival - > from_pos = i - nStartOffset ;
2022-04-02 06:49:26 +00:00
pDestArrival - > rep_len = cur_arrival [ j ] . rep_len ;
2020-05-28 16:38:19 +00:00
pDestArrival - > match_len = 0 ;
pDestArrival - > num_literals = nNumLiterals ;
2022-05-04 09:32:21 +00:00
pDestArrival - > rep_pos = cur_arrival [ j ] . rep_pos ;
2022-01-02 06:15:23 +00:00
pDestArrival - > score = nScore + nDisableScore ;
2019-10-10 12:42:08 +00:00
}
}
2019-10-08 14:23:33 +00:00
}
2019-08-26 22:51:34 +00:00
}
}
}
2022-01-02 06:15:23 +00:00
const lzsa_match * match = pCompressor - > match + ( ( i - nStartOffset ) < < MATCHES_PER_INDEX_SHIFT_V2 ) ;
2021-10-05 10:11:39 +00:00
const int nNumArrivalsForThisPos = j ;
int nMinOverallRepLen = 0 , nMaxOverallRepLen = 0 ;
2019-09-18 22:11:26 +00:00
2021-10-12 13:52:11 +00:00
int nRepMatchArrivalIdxAndLen [ ( NARRIVALS_PER_POSITION_V2_MAX * 2 ) + 1 ] ;
2020-12-17 13:47:25 +00:00
int nNumRepMatchArrivals = 0 ;
2019-12-15 17:04:16 +00:00
2021-10-12 19:02:16 +00:00
if ( ( i + MIN_MATCH_SIZE_V2 ) < = nEndOffset ) {
int nMaxRepLenForPos = nEndOffset - i ;
if ( nMaxRepLenForPos > LCP_MAX )
nMaxRepLenForPos = LCP_MAX ;
const unsigned char * pInWindowStart = pInWindow + i ;
const unsigned char * pInWindowMax = pInWindowStart + nMaxRepLenForPos ;
for ( j = 0 ; j < nNumArrivalsForThisPos ; j + + ) {
const int nRepOffset = cur_arrival [ j ] . rep_offset ;
2022-01-02 06:15:23 +00:00
if ( i > = nRepOffset ) {
2022-04-28 06:02:27 +00:00
if ( ! memcmp ( pInWindowStart , pInWindowStart - nRepOffset , MIN_MATCH_SIZE_V2 ) ) {
2022-01-02 06:15:23 +00:00
if ( nRepOffset ) {
2021-10-12 19:02:16 +00:00
const unsigned char * pInWindowAtPos ;
const int nLen0 = rle_len [ i - nRepOffset ] ;
const int nLen1 = rle_len [ i ] ;
int nMinLen = ( nLen0 < nLen1 ) ? nLen0 : nLen1 ;
if ( nMinLen > nMaxRepLenForPos )
nMinLen = nMaxRepLenForPos ;
pInWindowAtPos = pInWindowStart + nMinLen ;
2023-01-30 12:21:43 +00:00
while ( ( pInWindowAtPos + 8 ) < pInWindowMax & & ! memcmp ( pInWindowAtPos , pInWindowAtPos - nRepOffset , 8 ) )
2021-10-12 19:02:16 +00:00
pInWindowAtPos + = 8 ;
2023-01-30 12:21:43 +00:00
while ( ( pInWindowAtPos + 4 ) < pInWindowMax & & ! memcmp ( pInWindowAtPos , pInWindowAtPos - nRepOffset , 4 ) )
2021-10-12 19:02:16 +00:00
pInWindowAtPos + = 4 ;
2023-01-30 12:21:43 +00:00
while ( pInWindowAtPos < pInWindowMax & & pInWindowAtPos [ 0 ] = = pInWindowAtPos [ - nRepOffset ] )
2021-10-12 19:02:16 +00:00
pInWindowAtPos + + ;
2022-04-02 06:49:26 +00:00
const int nCurRepLen = ( const int ) ( pInWindowAtPos - pInWindowStart ) ;
2021-10-12 19:02:16 +00:00
if ( nMaxOverallRepLen < nCurRepLen )
nMaxOverallRepLen = nCurRepLen ;
nRepMatchArrivalIdxAndLen [ nNumRepMatchArrivals + + ] = j ;
nRepMatchArrivalIdxAndLen [ nNumRepMatchArrivals + + ] = nCurRepLen ;
}
2020-07-26 08:07:03 +00:00
}
}
}
}
2020-12-17 13:47:25 +00:00
nRepMatchArrivalIdxAndLen [ nNumRepMatchArrivals ] = - 1 ;
2020-07-26 08:07:03 +00:00
2020-08-18 07:13:54 +00:00
if ( ! nReduce ) {
2022-10-19 08:39:40 +00:00
memset ( nRepSlotHandledMask , 0 , nArrivalsPerPosition * ( ( LCP_MAX + 1 ) / 8 ) * sizeof ( unsigned char ) ) ;
2020-08-18 07:13:54 +00:00
}
2022-10-19 08:39:40 +00:00
memset ( nRepLenHandledMask , 0 , ( ( LCP_MAX + 1 ) / 8 ) * sizeof ( unsigned char ) ) ;
2020-08-18 07:13:54 +00:00
2019-10-29 09:45:57 +00:00
for ( m = 0 ; m < NMATCHES_PER_INDEX_V2 & & match [ m ] . length ; m + + ) {
2019-12-15 17:04:16 +00:00
int nMatchLen = match [ m ] . length & 0x7fff ;
2022-04-19 07:18:09 +00:00
const int nMatchOffset = match [ m ] . offset ;
2023-01-30 12:21:43 +00:00
int nNoRepmatchOffsetCost = 0 , nNoRepmatchScore = 0 ;
2020-07-26 08:07:03 +00:00
int nStartingMatchLen , k ;
2019-08-26 22:51:34 +00:00
2020-07-13 17:34:07 +00:00
if ( ( i + nMatchLen ) > nEndOffset )
nMatchLen = nEndOffset - i ;
2019-08-26 22:51:34 +00:00
2019-10-27 13:55:39 +00:00
if ( nInsertForwardReps )
2020-07-24 15:14:01 +00:00
lzsa_insert_forward_match_v2 ( pCompressor , pInWindow , i , nMatchOffset , nStartOffset , nEndOffset , 0 ) ;
2019-10-22 10:37:46 +00:00
2020-10-13 14:10:18 +00:00
int nNonRepMatchArrivalIdx = - 1 ;
for ( j = 0 ; j < nNumArrivalsForThisPos ; j + + ) {
2022-10-16 16:39:24 +00:00
if ( nMatchOffset ! = cur_arrival [ j ] . rep_offset ) {
2022-04-05 05:13:03 +00:00
const int nPrevCost = cur_arrival [ j ] . cost ;
2022-04-19 07:18:09 +00:00
const int nScorePenalty = 3 + ( match [ m ] . length > > 15 ) ;
2021-10-05 10:11:39 +00:00
2022-01-02 06:15:23 +00:00
nNoRepmatchOffsetCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ ;
2021-10-05 10:11:39 +00:00
if ( ! cur_arrival [ j ] . num_literals )
nNoRepmatchOffsetCost + = nModeSwitchPenalty ;
2022-01-02 06:15:23 +00:00
nNoRepmatchOffsetCost + = ( nMatchOffset < = 32 ) ? 4 : ( ( nMatchOffset < = 512 ) ? 8 : ( ( nMatchOffset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
nNoRepmatchScore = cur_arrival [ j ] . score + nScorePenalty - nDisableScore ;
2021-10-05 10:11:39 +00:00
2020-10-13 14:10:18 +00:00
nNonRepMatchArrivalIdx = j ;
break ;
}
}
2020-07-06 10:47:56 +00:00
int nMatchLenCost ;
2019-12-15 22:36:51 +00:00
if ( nMatchLen > = nLeaveAloneMatchSize ) {
2019-10-08 07:39:18 +00:00
nStartingMatchLen = nMatchLen ;
2020-07-24 15:14:01 +00:00
nMatchLenCost = 4 + 24 + 8 /* token */ ;
2019-12-15 17:04:16 +00:00
}
else {
2019-10-08 07:39:18 +00:00
nStartingMatchLen = nMinMatchSize ;
2022-04-19 07:18:09 +00:00
nMatchLenCost = /* 0 + */ 8 /* token */ ;
2019-12-15 17:04:16 +00:00
}
2019-10-08 07:39:18 +00:00
for ( k = nStartingMatchLen ; k < = nMatchLen ; k + + ) {
2019-12-15 17:04:16 +00:00
if ( k = = ( MATCH_RUN_LEN_V2 + MIN_MATCH_SIZE_V2 ) ) {
2020-07-24 15:14:01 +00:00
nMatchLenCost = 4 + 8 /* token */ ;
2019-12-15 17:04:16 +00:00
}
else {
if ( k = = ( MATCH_RUN_LEN_V2 + 15 + MIN_MATCH_SIZE_V2 ) )
2020-07-24 15:14:01 +00:00
nMatchLenCost = 4 + 8 + 8 /* token */ ;
2019-12-15 17:04:16 +00:00
else {
if ( k = = 256 )
2020-07-24 15:14:01 +00:00
nMatchLenCost = 4 + 24 + 8 /* token */ ;
2019-12-15 17:04:16 +00:00
}
}
2022-05-04 09:32:21 +00:00
lzsa_arrival * pDestSlots = & cur_arrival [ k < < ARRIVALS_PER_POSITION_SHIFT_V2 ] ;
2020-07-06 10:47:56 +00:00
2020-11-06 15:26:37 +00:00
/* Insert non-repmatch candidate */
2019-10-08 07:39:18 +00:00
2020-10-13 14:10:18 +00:00
if ( nNonRepMatchArrivalIdx > = 0 ) {
2022-01-02 06:15:23 +00:00
const int nCodingChoiceCost = nMatchLenCost + nNoRepmatchOffsetCost ;
2019-11-20 14:40:11 +00:00
2020-10-13 14:10:18 +00:00
if ( nCodingChoiceCost < pDestSlots [ nArrivalsPerPosition - 2 ] . cost | |
2022-04-19 07:18:09 +00:00
( nCodingChoiceCost = = pDestSlots [ nArrivalsPerPosition - 2 ] . cost & & nNoRepmatchScore < pDestSlots [ nArrivalsPerPosition - 2 ] . score & &
2022-04-02 06:49:26 +00:00
( nCodingChoiceCost ! = pDestSlots [ nArrivalsPerPosition - 1 ] . cost | | nMatchOffset ! = pDestSlots [ nArrivalsPerPosition - 1 ] . rep_offset ) ) ) {
2022-01-02 06:15:23 +00:00
int exists = 0 , n ;
2019-10-08 18:26:21 +00:00
2020-10-13 14:10:18 +00:00
for ( n = 0 ;
2021-10-12 19:02:16 +00:00
pDestSlots [ n ] . cost < nCodingChoiceCost ;
2020-10-13 14:10:18 +00:00
n + + ) {
if ( pDestSlots [ n ] . rep_offset = = nMatchOffset ) {
exists = 1 ;
break ;
}
}
if ( ! exists ) {
for ( ;
2022-04-19 07:18:09 +00:00
n < nArrivalsPerPosition & & pDestSlots [ n ] . cost = = nCodingChoiceCost & & nNoRepmatchScore > = pDestSlots [ n ] . score ;
2020-07-06 10:47:56 +00:00
n + + ) {
if ( pDestSlots [ n ] . rep_offset = = nMatchOffset ) {
exists = 1 ;
break ;
}
}
2019-09-18 22:11:26 +00:00
2020-07-06 10:47:56 +00:00
if ( ! exists ) {
2020-10-13 14:10:18 +00:00
if ( n < nArrivalsPerPosition - 1 ) {
2022-04-02 06:49:26 +00:00
int z ;
2020-10-13 14:10:18 +00:00
2022-04-02 06:49:26 +00:00
for ( z = n ;
z < nArrivalsPerPosition - 1 & & pDestSlots [ z ] . cost = = nCodingChoiceCost ;
z + + ) {
if ( pDestSlots [ z ] . rep_offset = = nMatchOffset ) {
if ( ! nInsertForwardReps | | pDestSlots [ nArrivalsPerPosition - 1 ] . from_slot | | pDestSlots [ z ] . rep_pos > = i ) {
2021-10-10 05:52:03 +00:00
exists = 1 ;
}
2022-04-02 06:49:26 +00:00
break ;
2020-10-13 14:10:18 +00:00
}
2019-11-26 10:58:34 +00:00
}
2019-10-08 18:26:21 +00:00
2020-10-13 14:10:18 +00:00
if ( ! exists ) {
2022-04-02 06:49:26 +00:00
for ( ; z < nArrivalsPerPosition - 1 & & pDestSlots [ z ] . from_slot ; z + + ) {
2020-10-13 14:10:18 +00:00
if ( pDestSlots [ z ] . rep_offset = = nMatchOffset )
2020-07-24 15:14:01 +00:00
break ;
}
2019-12-09 08:54:56 +00:00
2020-10-13 14:10:18 +00:00
memmove ( & pDestSlots [ n + 1 ] ,
& pDestSlots [ n ] ,
sizeof ( lzsa_arrival ) * ( z - n ) ) ;
lzsa_arrival * pDestArrival = & pDestSlots [ n ] ;
pDestArrival - > cost = nCodingChoiceCost ;
2022-04-02 06:49:26 +00:00
pDestArrival - > rep_offset = nMatchOffset ;
2020-11-02 14:24:08 +00:00
pDestArrival - > from_slot = nNonRepMatchArrivalIdx + 1 ;
2022-05-04 09:32:21 +00:00
pDestArrival - > from_pos = i - nStartOffset ;
2022-04-02 06:49:26 +00:00
pDestArrival - > rep_len = k ;
2020-10-13 14:10:18 +00:00
pDestArrival - > match_len = k ;
pDestArrival - > num_literals = 0 ;
2022-05-04 09:32:21 +00:00
pDestArrival - > rep_pos = i ;
2022-01-02 06:15:23 +00:00
pDestArrival - > score = nNoRepmatchScore + nDisableScore ;
2021-10-10 05:52:03 +00:00
nRepLenHandledMask [ k > > 3 ] & = ~ ( ( 1 ^ nReduce ) < < ( k & 7 ) ) ;
2019-10-08 14:23:33 +00:00
}
}
2019-09-20 10:24:27 +00:00
}
}
2020-07-06 10:47:56 +00:00
}
}
2019-09-20 10:24:27 +00:00
2020-07-06 10:47:56 +00:00
/* Insert repmatch candidates */
2019-10-08 14:23:33 +00:00
2020-12-16 09:03:18 +00:00
if ( k > nMinOverallRepLen & & k < = nMaxOverallRepLen & & ( nRepLenHandledMask [ k > > 3 ] & ( 1 < < ( k & 7 ) ) ) = = 0 ) {
2020-12-17 13:47:25 +00:00
int nCurRepMatchArrival ;
2020-12-16 09:03:18 +00:00
nRepLenHandledMask [ k > > 3 ] | = 1 < < ( k & 7 ) ;
2020-12-17 13:47:25 +00:00
for ( nCurRepMatchArrival = 0 ; ( j = nRepMatchArrivalIdxAndLen [ nCurRepMatchArrival ] ) > = 0 ; nCurRepMatchArrival + = 2 ) {
2021-10-10 05:52:03 +00:00
if ( nRepMatchArrivalIdxAndLen [ nCurRepMatchArrival + 1 ] > = k ) {
2022-01-02 06:15:23 +00:00
const int nMaskOffset = ( j < < 7 ) + ( k > > 3 ) ;
2019-09-20 10:24:27 +00:00
2021-10-10 05:52:03 +00:00
if ( nReduce | | ! ( nRepSlotHandledMask [ nMaskOffset ] & ( 1 < < ( k & 7 ) ) ) ) {
2022-01-02 06:15:23 +00:00
const int nScore = cur_arrival [ j ] . score + 2 - nDisableScore ;
2022-04-02 06:49:26 +00:00
const int nRepOffset = cur_arrival [ j ] . rep_offset ;
2021-10-10 05:52:03 +00:00
2022-04-02 06:49:26 +00:00
if ( nRepOffset ! = pDestSlots [ nArrivalsPerPosition - 1 ] . rep_offset ) {
2022-04-05 05:13:03 +00:00
const int nPrevCost = cur_arrival [ j ] . cost ;
const int nRepCodingChoiceCost = nPrevCost /* the actual cost of the literals themselves accumulates up the chain */ + nMatchLenCost ;
2022-04-02 06:49:26 +00:00
if ( nRepCodingChoiceCost < pDestSlots [ nArrivalsPerPosition - 1 ] . cost | |
2022-04-19 07:18:09 +00:00
( nRepCodingChoiceCost = = pDestSlots [ nArrivalsPerPosition - 1 ] . cost & & nScore < pDestSlots [ nArrivalsPerPosition - 1 ] . score ) ) {
2022-04-02 06:49:26 +00:00
int exists = 0 , n ;
2019-10-08 18:26:21 +00:00
2022-04-02 06:49:26 +00:00
for ( n = 0 ;
pDestSlots [ n ] . cost < nRepCodingChoiceCost ;
2021-10-10 05:52:03 +00:00
n + + ) {
if ( pDestSlots [ n ] . rep_offset = = nRepOffset ) {
exists = 1 ;
2022-04-02 06:49:26 +00:00
if ( ! nReduce )
nRepSlotHandledMask [ nMaskOffset ] | = 1 < < ( k & 7 ) ;
2021-10-10 05:52:03 +00:00
break ;
2019-12-09 08:54:56 +00:00
}
2021-10-10 05:52:03 +00:00
}
2019-12-09 08:54:56 +00:00
2021-10-10 05:52:03 +00:00
if ( ! exists ) {
2022-04-02 06:49:26 +00:00
for ( ;
2022-04-19 07:18:09 +00:00
n < nArrivalsPerPosition & & pDestSlots [ n ] . cost = = nRepCodingChoiceCost & & nScore > = pDestSlots [ n ] . score ;
2022-04-02 06:49:26 +00:00
n + + ) {
if ( pDestSlots [ n ] . rep_offset = = nRepOffset ) {
exists = 1 ;
break ;
2020-07-14 10:36:56 +00:00
}
2022-04-02 06:49:26 +00:00
}
2020-07-24 15:14:01 +00:00
2022-04-02 06:49:26 +00:00
if ( ! exists ) {
if ( n < nArrivalsPerPosition ) {
2021-10-10 05:52:03 +00:00
int z ;
2022-04-02 06:49:26 +00:00
for ( z = n ;
z < nArrivalsPerPosition - 1 & & pDestSlots [ z ] . cost = = nRepCodingChoiceCost ;
z + + ) {
if ( pDestSlots [ z ] . rep_offset = = nRepOffset ) {
exists = 1 ;
2021-10-10 05:52:03 +00:00
break ;
2022-04-02 06:49:26 +00:00
}
2021-10-10 05:52:03 +00:00
}
2022-04-02 06:49:26 +00:00
if ( ! exists ) {
for ( ; z < nArrivalsPerPosition - 1 & & pDestSlots [ z ] . from_slot ; z + + ) {
if ( pDestSlots [ z ] . rep_offset = = nRepOffset )
break ;
}
memmove ( & pDestSlots [ n + 1 ] ,
& pDestSlots [ n ] ,
sizeof ( lzsa_arrival ) * ( z - n ) ) ;
lzsa_arrival * pDestArrival = & pDestSlots [ n ] ;
pDestArrival - > cost = nRepCodingChoiceCost ;
pDestArrival - > rep_offset = nRepOffset ;
pDestArrival - > from_slot = j + 1 ;
2022-05-04 09:32:21 +00:00
pDestArrival - > from_pos = i - nStartOffset ;
2022-04-02 06:49:26 +00:00
pDestArrival - > rep_len = k ;
pDestArrival - > match_len = k ;
pDestArrival - > num_literals = 0 ;
2022-05-04 09:32:21 +00:00
pDestArrival - > rep_pos = i ;
2022-04-02 06:49:26 +00:00
pDestArrival - > score = nScore + nDisableScore ;
nRepLenHandledMask [ k > > 3 ] & = ~ ( ( 1 ^ nReduce ) < < ( k & 7 ) ) ;
}
2021-10-10 05:52:03 +00:00
}
2020-07-14 10:36:56 +00:00
}
2019-10-08 18:26:21 +00:00
}
2019-10-08 14:23:33 +00:00
}
2022-04-02 06:49:26 +00:00
else {
break ;
}
2021-10-10 05:52:03 +00:00
}
2020-07-06 10:47:56 +00:00
}
2019-09-20 10:24:27 +00:00
}
}
2020-07-29 11:01:24 +00:00
if ( k < nMaxRepInsertedLen )
nMinOverallRepLen = k ;
2019-08-26 22:51:34 +00:00
}
}
2019-12-15 17:04:16 +00:00
2023-01-30 12:21:43 +00:00
if ( nMatchLen > = LCP_MAX & & ( ( m + 1 ) > = NMATCHES_PER_INDEX_V2 | | ( match [ m + 1 ] . length & 0x7fff ) < LCP_MAX ) )
2019-12-15 17:04:16 +00:00
break ;
2019-08-26 22:51:34 +00:00
}
}
2022-01-02 06:15:23 +00:00
if ( ! nInsertForwardReps ) {
2022-10-15 10:10:41 +00:00
const lzsa_arrival * end_arrival = & arrival [ i < < ARRIVALS_PER_POSITION_SHIFT_V2 ] ;
2022-04-19 07:18:09 +00:00
lzsa_match * pBestMatch = pCompressor - > best_match - nStartOffset ;
2019-08-26 22:51:34 +00:00
2022-10-20 15:16:34 +00:00
while ( end_arrival - > from_slot > 0 & & ( end_arrival - > from_pos + nStartOffset ) < nEndOffset ) {
2022-05-04 09:32:21 +00:00
pBestMatch [ end_arrival - > from_pos + nStartOffset ] . length = end_arrival - > match_len ;
pBestMatch [ end_arrival - > from_pos + nStartOffset ] . offset = ( end_arrival - > match_len ) ? end_arrival - > rep_offset : 0 ;
end_arrival = & arrival [ ( ( end_arrival - > from_pos + nStartOffset ) < < ARRIVALS_PER_POSITION_SHIFT_V2 ) + ( end_arrival - > from_slot - 1 ) ] ;
2022-01-02 06:15:23 +00:00
}
2019-08-26 22:51:34 +00:00
}
}
2019-06-07 21:15:40 +00:00
/**
* Attempt to minimize the number of commands issued in the compressed data block , in order to speed up decompression without
* impacting the compression ratio
*
* @ param pCompressor compression context
2019-09-20 10:24:27 +00:00
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
2019-06-07 21:15:40 +00:00
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
*
* @ return non - zero if the number of tokens was reduced , 0 if it wasn ' t
*/
2022-04-19 07:18:09 +00:00
static int lzsa_optimize_command_count_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset ) {
lzsa_match * pBestMatch = pCompressor - > best_match - nStartOffset ;
2019-06-07 21:15:40 +00:00
int i ;
int nNumLiterals = 0 ;
2019-10-09 11:16:29 +00:00
int nPrevRepMatchOffset = 0 ;
2019-06-21 15:28:17 +00:00
int nRepMatchOffset = 0 ;
2019-10-09 11:16:29 +00:00
int nRepMatchLen = 0 ;
int nRepIndex = 0 ;
2019-09-17 06:10:52 +00:00
int nDidReduce = 0 ;
2019-06-07 21:15:40 +00:00
for ( i = nStartOffset ; i < nEndOffset ; ) {
2019-07-01 07:25:19 +00:00
lzsa_match * pMatch = pBestMatch + i ;
2019-06-07 21:15:40 +00:00
2019-11-18 11:10:23 +00:00
if ( pMatch - > length = = 0 & &
2020-07-13 17:34:07 +00:00
( i + 1 ) < nEndOffset & &
2019-11-18 11:10:23 +00:00
pBestMatch [ i + 1 ] . length > = MIN_MATCH_SIZE_V2 & &
2019-11-19 18:48:39 +00:00
pBestMatch [ i + 1 ] . length < MAX_VARLEN & &
2019-11-18 11:10:23 +00:00
pBestMatch [ i + 1 ] . offset & &
i > = pBestMatch [ i + 1 ] . offset & &
2020-07-13 17:34:07 +00:00
( i + pBestMatch [ i + 1 ] . length + 1 ) < = nEndOffset & &
2019-11-18 11:10:23 +00:00
! memcmp ( pInWindow + i - ( pBestMatch [ i + 1 ] . offset ) , pInWindow + i , pBestMatch [ i + 1 ] . length + 1 ) ) {
2022-04-02 06:49:26 +00:00
const int nCurLenSize = lzsa_get_match_varlen_size_v2 ( pBestMatch [ i + 1 ] . length - MIN_MATCH_SIZE_V2 ) ;
const int nReducedLenSize = lzsa_get_match_varlen_size_v2 ( pBestMatch [ i + 1 ] . length + 1 - MIN_MATCH_SIZE_V2 ) ;
2019-11-18 11:10:23 +00:00
if ( ( nReducedLenSize - nCurLenSize ) < = 8 ) {
/* Merge */
pBestMatch [ i ] . length = pBestMatch [ i + 1 ] . length + 1 ;
pBestMatch [ i ] . offset = pBestMatch [ i + 1 ] . offset ;
pBestMatch [ i + 1 ] . length = 0 ;
pBestMatch [ i + 1 ] . offset = 0 ;
nDidReduce = 1 ;
continue ;
}
}
2019-06-07 21:15:40 +00:00
if ( pMatch - > length > = MIN_MATCH_SIZE_V2 ) {
2019-09-20 10:24:27 +00:00
if ( ( i + pMatch - > length ) < nEndOffset /* Don't consider the last match in the block, we can only reduce a match inbetween other tokens */ ) {
2019-09-17 06:10:52 +00:00
int nNextIndex = i + pMatch - > length ;
int nNextLiterals = 0 ;
while ( nNextIndex < nEndOffset & & pBestMatch [ nNextIndex ] . length < MIN_MATCH_SIZE_V2 ) {
nNextLiterals + + ;
nNextIndex + + ;
}
2019-06-07 21:15:40 +00:00
2022-10-19 08:39:40 +00:00
if ( nNextIndex < nEndOffset ) {
2019-09-20 10:24:27 +00:00
/* This command is a match, is followed by 'nNextLiterals' literals and then by another match */
2022-10-16 16:39:24 +00:00
if ( nRepMatchOffset & & pMatch - > offset ! = nRepMatchOffset & & ( pBestMatch [ nNextIndex ] . offset ! = pMatch - > offset | |
2019-10-08 07:39:18 +00:00
( ( pMatch - > offset < = 32 ) ? 4 : ( ( pMatch - > offset < = 512 ) ? 8 : ( ( pMatch - > offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ) >
2019-09-20 10:24:27 +00:00
( ( pBestMatch [ nNextIndex ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ nNextIndex ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ nNextIndex ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ) ) ) {
/* Check if we can change the current match's offset to be the same as the previous match's offset, and get an extra repmatch. This will occur when
* matching large regions of identical bytes for instance , where there are too many offsets to be considered by the parser , and when not compressing to favor the
* ratio ( the forward arrivals parser already has this covered ) . */
2021-11-26 10:24:44 +00:00
if ( i > = nRepMatchOffset & &
2022-10-16 16:39:24 +00:00
! memcmp ( pInWindow + i - nRepMatchOffset , pInWindow + i , pMatch - > length ) ) {
2019-09-20 10:24:27 +00:00
pMatch - > offset = nRepMatchOffset ;
2019-10-03 14:58:34 +00:00
nDidReduce = 1 ;
}
2019-09-20 10:24:27 +00:00
}
2023-01-30 12:21:43 +00:00
if ( pBestMatch [ nNextIndex ] . offset & & pMatch - > offset ! = pBestMatch [ nNextIndex ] . offset ) {
2019-09-23 18:24:50 +00:00
/* Otherwise, try to gain a match forward as well */
2022-10-16 16:39:24 +00:00
if ( i > = pBestMatch [ nNextIndex ] . offset & & ( i + pMatch - > length ) < = nEndOffset ) {
2019-09-23 18:24:50 +00:00
int nMaxLen = 0 ;
2022-10-15 10:10:41 +00:00
const unsigned char * pInWindowAtPos = pInWindow + i ;
2023-01-30 12:21:43 +00:00
while ( ( nMaxLen + 8 ) < pMatch - > length & & ! memcmp ( pInWindowAtPos + nMaxLen - pBestMatch [ nNextIndex ] . offset , pInWindowAtPos + nMaxLen , 8 ) )
nMaxLen + = 8 ;
2022-10-15 10:10:41 +00:00
while ( ( nMaxLen + 4 ) < pMatch - > length & & ! memcmp ( pInWindowAtPos + nMaxLen - pBestMatch [ nNextIndex ] . offset , pInWindowAtPos + nMaxLen , 4 ) )
nMaxLen + = 4 ;
2022-10-16 16:39:24 +00:00
while ( nMaxLen < pMatch - > length & & pInWindowAtPos [ nMaxLen - pBestMatch [ nNextIndex ] . offset ] = = pInWindowAtPos [ nMaxLen ] )
2019-09-23 18:24:50 +00:00
nMaxLen + + ;
if ( nMaxLen > = pMatch - > length ) {
/* Replace */
pMatch - > offset = pBestMatch [ nNextIndex ] . offset ;
2019-10-03 14:58:34 +00:00
nDidReduce = 1 ;
2019-09-23 18:24:50 +00:00
}
else if ( nMaxLen > = 2 & & pMatch - > offset ! = nRepMatchOffset ) {
int nPartialSizeBefore , nPartialSizeAfter ;
nPartialSizeBefore = lzsa_get_match_varlen_size_v2 ( pMatch - > length - MIN_MATCH_SIZE_V2 ) ;
nPartialSizeBefore + = ( pMatch - > offset < = 32 ) ? 4 : ( ( pMatch - > offset < = 512 ) ? 8 : ( ( pMatch - > offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
nPartialSizeBefore + = lzsa_get_literals_varlen_size_v2 ( nNextLiterals ) ;
2023-01-30 12:21:43 +00:00
nPartialSizeBefore + = ( pBestMatch [ nNextIndex ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ nNextIndex ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ nNextIndex ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
2019-09-23 18:24:50 +00:00
nPartialSizeAfter = lzsa_get_match_varlen_size_v2 ( nMaxLen - MIN_MATCH_SIZE_V2 ) ;
nPartialSizeAfter + = lzsa_get_literals_varlen_size_v2 ( nNextLiterals + ( pMatch - > length - nMaxLen ) ) + ( ( pMatch - > length - nMaxLen ) < < 3 ) ;
2023-01-30 12:21:43 +00:00
if ( nRepMatchOffset ! = pBestMatch [ nNextIndex ] . offset )
nPartialSizeAfter + = ( pBestMatch [ nNextIndex ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ nNextIndex ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ nNextIndex ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
2019-09-23 18:24:50 +00:00
if ( nPartialSizeAfter < nPartialSizeBefore ) {
2022-10-15 10:10:41 +00:00
const int nMatchLen = pMatch - > length ;
2019-09-23 18:24:50 +00:00
int j ;
/* We gain a repmatch that is shorter than the original match as this is the best we can do, so it is followed by extra literals, but
* we have calculated that this is shorter */
2022-10-15 10:10:41 +00:00
pMatch - > length = nMaxLen ;
2019-09-23 18:24:50 +00:00
pMatch - > offset = pBestMatch [ nNextIndex ] . offset ;
2022-10-15 10:10:41 +00:00
for ( j = nMaxLen ; j < nMatchLen ; j + + ) {
2019-09-23 18:24:50 +00:00
pBestMatch [ i + j ] . length = 0 ;
}
2019-10-03 14:58:34 +00:00
nDidReduce = 1 ;
2019-09-23 18:24:50 +00:00
}
}
}
}
2019-09-22 18:41:09 +00:00
if ( pMatch - > length < 9 /* Don't waste time considering large matches, they will always win over literals */ ) {
/* Calculate this command's current cost (excluding 'nNumLiterals' bytes) */
2019-06-07 21:15:40 +00:00
2019-09-22 18:41:09 +00:00
int nCurCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNumLiterals ) + lzsa_get_match_varlen_size_v2 ( pMatch - > length - MIN_MATCH_SIZE_V2 ) ;
if ( pMatch - > offset ! = nRepMatchOffset )
nCurCommandSize + = ( pMatch - > offset < = 32 ) ? 4 : ( ( pMatch - > offset < = 512 ) ? 8 : ( ( pMatch - > offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
2019-06-07 21:15:40 +00:00
2019-09-22 18:41:09 +00:00
/* Calculate the next command's current cost */
2020-07-29 13:23:22 +00:00
int nNextCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNextLiterals ) + /* (nNextLiterals << 3) + */ lzsa_get_match_varlen_size_v2 ( pBestMatch [ nNextIndex ] . length - MIN_MATCH_SIZE_V2 ) ;
2019-09-22 18:41:09 +00:00
if ( pBestMatch [ nNextIndex ] . offset ! = pMatch - > offset )
nNextCommandSize + = ( pBestMatch [ nNextIndex ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ nNextIndex ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ nNextIndex ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
2019-06-07 21:15:40 +00:00
2022-04-02 06:49:26 +00:00
const int nOriginalCombinedCommandSize = nCurCommandSize + nNextCommandSize ;
2019-09-17 06:10:52 +00:00
2019-09-22 18:41:09 +00:00
/* Calculate the cost of replacing this match command by literals + the next command with the cost of encoding these literals (excluding 'nNumLiterals' bytes) */
2020-07-29 13:23:22 +00:00
int nReducedCommandSize = ( pMatch - > length < < 3 ) + 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNumLiterals + pMatch - > length + nNextLiterals ) + /* (nNextLiterals << 3) + */ lzsa_get_match_varlen_size_v2 ( pBestMatch [ nNextIndex ] . length - MIN_MATCH_SIZE_V2 ) ;
2019-09-22 18:41:09 +00:00
if ( pBestMatch [ nNextIndex ] . offset ! = nRepMatchOffset )
nReducedCommandSize + = ( pBestMatch [ nNextIndex ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ nNextIndex ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ nNextIndex ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
2019-09-17 06:10:52 +00:00
2019-10-09 11:16:29 +00:00
int nReplaceRepOffset = 0 ;
2021-11-26 10:24:44 +00:00
if ( nRepMatchOffset & & nRepMatchOffset ! = nPrevRepMatchOffset & & nRepMatchLen > = MIN_MATCH_SIZE_V2 & & nRepMatchOffset ! = pBestMatch [ nNextIndex ] . offset & & nRepIndex > = pBestMatch [ nNextIndex ] . offset & &
2020-07-13 17:34:07 +00:00
( nRepIndex - pBestMatch [ nNextIndex ] . offset + nRepMatchLen ) < = nEndOffset & &
2019-10-09 11:16:29 +00:00
! memcmp ( pInWindow + nRepIndex - nRepMatchOffset , pInWindow + nRepIndex - pBestMatch [ nNextIndex ] . offset , nRepMatchLen ) ) {
/* Replacing this match command by literals would let us create a repmatch */
nReplaceRepOffset = 1 ;
nReducedCommandSize - = ( nRepMatchOffset < = 32 ) ? 4 : ( ( nRepMatchOffset < = 512 ) ? 8 : ( ( nRepMatchOffset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
}
2019-09-22 18:41:09 +00:00
if ( nOriginalCombinedCommandSize > = nReducedCommandSize ) {
/* Reduce */
2022-04-02 06:49:26 +00:00
const int nMatchLen = pMatch - > length ;
2019-09-22 18:41:09 +00:00
int j ;
for ( j = 0 ; j < nMatchLen ; j + + ) {
pBestMatch [ i + j ] . length = 0 ;
}
2019-09-17 06:10:52 +00:00
2019-09-22 18:41:09 +00:00
nDidReduce = 1 ;
2019-10-09 11:16:29 +00:00
if ( nReplaceRepOffset ) {
pBestMatch [ nRepIndex ] . offset = pBestMatch [ nNextIndex ] . offset ;
nRepMatchOffset = pBestMatch [ nNextIndex ] . offset ;
}
2019-09-22 18:41:09 +00:00
continue ;
}
2019-06-21 15:28:17 +00:00
}
2019-06-07 21:15:40 +00:00
}
}
2022-10-15 10:10:41 +00:00
if ( ( i + pMatch - > length ) < nEndOffset & & pMatch - > offset & & pMatch - > length > = MIN_MATCH_SIZE_V2 & &
pBestMatch [ i + pMatch - > length ] . offset & &
2019-10-24 11:05:32 +00:00
pBestMatch [ i + pMatch - > length ] . length > = MIN_MATCH_SIZE_V2 & &
( pMatch - > length + pBestMatch [ i + pMatch - > length ] . length ) < = MAX_VARLEN & &
2021-11-28 09:01:11 +00:00
( i + pMatch - > length ) > = pMatch - > offset & &
( i + pMatch - > length ) > = pBestMatch [ i + pMatch - > length ] . offset & &
2019-11-26 19:48:13 +00:00
( i + pMatch - > length + pBestMatch [ i + pMatch - > length ] . length ) < = nEndOffset & &
2019-10-24 11:05:32 +00:00
! memcmp ( pInWindow + i - pMatch - > offset + pMatch - > length ,
pInWindow + i + pMatch - > length - pBestMatch [ i + pMatch - > length ] . offset ,
pBestMatch [ i + pMatch - > length ] . length ) ) {
2019-06-07 21:15:40 +00:00
2019-11-11 17:41:08 +00:00
int nNextIndex = i + pMatch - > length ;
while ( nNextIndex < nEndOffset & & pBestMatch [ nNextIndex ] . length < MIN_MATCH_SIZE_V2 ) {
nNextIndex + + ;
}
int nCurPartialSize = lzsa_get_match_varlen_size_v2 ( pMatch - > length - MIN_MATCH_SIZE_V2 ) ;
2019-06-07 21:15:40 +00:00
2020-07-29 13:23:22 +00:00
nCurPartialSize + = 8 /* token */ + /* lzsa_get_literals_varlen_size_v2(0) + */ lzsa_get_match_varlen_size_v2 ( pBestMatch [ i + pMatch - > length ] . length - MIN_MATCH_SIZE_V2 ) ;
2019-11-11 17:41:08 +00:00
if ( pBestMatch [ i + pMatch - > length ] . offset ! = pMatch - > offset )
nCurPartialSize + = ( pBestMatch [ i + pMatch - > length ] . offset < = 32 ) ? 4 : ( ( pBestMatch [ i + pMatch - > length ] . offset < = 512 ) ? 8 : ( ( pBestMatch [ i + pMatch - > length ] . offset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
int nReducedPartialSize = lzsa_get_match_varlen_size_v2 ( pMatch - > length + pBestMatch [ i + pMatch - > length ] . length - MIN_MATCH_SIZE_V2 ) ;
2022-04-02 06:49:26 +00:00
if ( nNextIndex < nEndOffset ) {
const int nNextOffset = pBestMatch [ nNextIndex ] . offset ;
if ( nNextOffset ! = pBestMatch [ i + pMatch - > length ] . offset )
nCurPartialSize + = ( nNextOffset < = 32 ) ? 4 : ( ( nNextOffset < = 512 ) ? 8 : ( ( nNextOffset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
if ( nNextOffset ! = pMatch - > offset )
nReducedPartialSize + = ( nNextOffset < = 32 ) ? 4 : ( ( nNextOffset < = 512 ) ? 8 : ( ( nNextOffset < = ( 8192 + 512 ) ) ? 12 : 16 ) ) ;
}
2019-11-11 17:41:08 +00:00
if ( nCurPartialSize > = nReducedPartialSize ) {
2022-04-02 06:49:26 +00:00
const int nMatchLen = pMatch - > length ;
2019-11-11 17:41:08 +00:00
/* Join */
pMatch - > length + = pBestMatch [ i + nMatchLen ] . length ;
2022-04-02 06:49:26 +00:00
pBestMatch [ i + nMatchLen ] . length = 0 ;
2019-11-11 17:41:08 +00:00
pBestMatch [ i + nMatchLen ] . offset = 0 ;
nDidReduce = 1 ;
continue ;
}
2019-06-07 21:15:40 +00:00
}
2019-10-09 11:16:29 +00:00
nPrevRepMatchOffset = nRepMatchOffset ;
nRepMatchLen = pMatch - > length ;
2022-04-28 06:02:27 +00:00
nRepMatchOffset = pMatch - > offset ;
2019-10-09 11:16:29 +00:00
nRepIndex = i ;
2019-06-07 21:15:40 +00:00
2019-09-17 06:10:52 +00:00
i + = pMatch - > length ;
nNumLiterals = 0 ;
2019-06-07 21:15:40 +00:00
}
else {
nNumLiterals + + ;
i + + ;
}
}
return nDidReduce ;
}
/**
* Emit block of compressed data
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
2022-10-15 10:10:41 +00:00
static int lzsa_write_block_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset , unsigned char * pOutData , const int nMaxOutDataSize ) {
const lzsa_match * pBestMatch = pCompressor - > best_match - nStartOffset ;
2019-06-07 21:15:40 +00:00
int i ;
int nNumLiterals = 0 ;
int nInFirstLiteralOffset = 0 ;
int nOutOffset = 0 ;
2020-07-24 15:14:01 +00:00
int nCurNibbleOffset = - 1 ;
2019-06-07 21:15:40 +00:00
int nRepMatchOffset = 0 ;
for ( i = nStartOffset ; i < nEndOffset ; ) {
2019-07-01 07:25:19 +00:00
const lzsa_match * pMatch = pBestMatch + i ;
2019-06-07 21:15:40 +00:00
if ( pMatch - > length > = MIN_MATCH_SIZE_V2 ) {
2022-01-02 06:15:23 +00:00
const int nMatchLen = pMatch - > length ;
2022-04-28 06:02:27 +00:00
const int nMatchOffset = pMatch - > offset ;
2022-01-02 06:15:23 +00:00
const int nEncodedMatchLen = nMatchLen - MIN_MATCH_SIZE_V2 ;
const int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V2 ) ? LITERALS_RUN_LEN_V2 : nNumLiterals ;
const int nTokenMatchLen = ( nEncodedMatchLen > = MATCH_RUN_LEN_V2 ) ? MATCH_RUN_LEN_V2 : nEncodedMatchLen ;
2019-06-07 21:15:40 +00:00
int nTokenOffsetMode ;
int nOffsetSize ;
if ( nMatchOffset = = nRepMatchOffset ) {
nTokenOffsetMode = 0xe0 ;
nOffsetSize = 0 ;
}
else {
if ( nMatchOffset < = 32 ) {
2022-10-15 10:10:41 +00:00
nTokenOffsetMode = /* 0x00 | */ ( ( ( ( - nMatchOffset ) & 0x01 ) < < 5 ) ^ 0x20 ) ;
2019-06-07 21:15:40 +00:00
nOffsetSize = 4 ;
}
else if ( nMatchOffset < = 512 ) {
2019-06-08 11:35:03 +00:00
nTokenOffsetMode = 0x40 | ( ( ( ( - nMatchOffset ) & 0x100 ) > > 3 ) ^ 0x20 ) ;
2019-06-07 21:15:40 +00:00
nOffsetSize = 8 ;
}
else if ( nMatchOffset < = ( 8192 + 512 ) ) {
2019-06-08 11:35:03 +00:00
nTokenOffsetMode = 0x80 | ( ( ( ( - ( nMatchOffset - 512 ) ) & 0x0100 ) > > 3 ) ^ 0x20 ) ;
2019-06-07 21:15:40 +00:00
nOffsetSize = 12 ;
}
else {
nTokenOffsetMode = 0xc0 ;
nOffsetSize = 16 ;
}
}
2022-01-02 06:15:23 +00:00
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNumLiterals ) + ( nNumLiterals < < 3 ) + nOffsetSize /* match offset */ + lzsa_get_match_varlen_size_v2 ( nEncodedMatchLen ) ;
2019-06-07 21:15:40 +00:00
if ( ( nOutOffset + ( ( nCommandSize + 7 ) > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
if ( nMatchOffset < MIN_OFFSET | | nMatchOffset > MAX_OFFSET )
return - 1 ;
pOutData [ nOutOffset + + ] = nTokenOffsetMode | ( nTokenLiteralsLen < < 3 ) | nTokenMatchLen ;
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_literals_varlen_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , nNumLiterals ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
2019-10-09 16:20:22 +00:00
if ( nNumLiterals < pCompressor - > stats . min_literals | | pCompressor - > stats . min_literals = = - 1 )
pCompressor - > stats . min_literals = nNumLiterals ;
if ( nNumLiterals > pCompressor - > stats . max_literals )
pCompressor - > stats . max_literals = nNumLiterals ;
pCompressor - > stats . total_literals + = nNumLiterals ;
pCompressor - > stats . literals_divisor + + ;
2019-06-07 21:15:40 +00:00
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nInFirstLiteralOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
nNumLiterals = 0 ;
}
if ( nTokenOffsetMode = = 0x00 | | nTokenOffsetMode = = 0x20 ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , ( ( - nMatchOffset ) & 0x1e ) > > 1 ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
}
else if ( nTokenOffsetMode = = 0x40 | | nTokenOffsetMode = = 0x60 ) {
pOutData [ nOutOffset + + ] = ( - nMatchOffset ) & 0xff ;
}
else if ( nTokenOffsetMode = = 0x80 | | nTokenOffsetMode = = 0xa0 ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , ( ( - ( nMatchOffset - 512 ) ) > > 9 ) & 0x0f ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
pOutData [ nOutOffset + + ] = ( - ( nMatchOffset - 512 ) ) & 0xff ;
}
else if ( nTokenOffsetMode = = 0xc0 ) {
pOutData [ nOutOffset + + ] = ( - nMatchOffset ) > > 8 ;
pOutData [ nOutOffset + + ] = ( - nMatchOffset ) & 0xff ;
}
2019-10-09 16:20:22 +00:00
if ( nMatchOffset = = nRepMatchOffset )
pCompressor - > stats . num_rep_offsets + + ;
2019-06-07 21:15:40 +00:00
nRepMatchOffset = nMatchOffset ;
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_match_varlen_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , nEncodedMatchLen ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
2019-10-09 16:20:22 +00:00
if ( nMatchOffset < pCompressor - > stats . min_offset | | pCompressor - > stats . min_offset = = - 1 )
pCompressor - > stats . min_offset = nMatchOffset ;
if ( nMatchOffset > pCompressor - > stats . max_offset )
pCompressor - > stats . max_offset = nMatchOffset ;
pCompressor - > stats . total_offsets + = nMatchOffset ;
if ( nMatchLen < pCompressor - > stats . min_match_len | | pCompressor - > stats . min_match_len = = - 1 )
pCompressor - > stats . min_match_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_match_len )
pCompressor - > stats . max_match_len = nMatchLen ;
pCompressor - > stats . total_match_lens + = nMatchLen ;
pCompressor - > stats . match_divisor + + ;
if ( nMatchOffset = = 1 ) {
if ( nMatchLen < pCompressor - > stats . min_rle1_len | | pCompressor - > stats . min_rle1_len = = - 1 )
pCompressor - > stats . min_rle1_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_rle1_len )
pCompressor - > stats . max_rle1_len = nMatchLen ;
pCompressor - > stats . total_rle1_lens + = nMatchLen ;
pCompressor - > stats . rle1_divisor + + ;
}
else if ( nMatchOffset = = 2 ) {
if ( nMatchLen < pCompressor - > stats . min_rle2_len | | pCompressor - > stats . min_rle2_len = = - 1 )
pCompressor - > stats . min_rle2_len = nMatchLen ;
if ( nMatchLen > pCompressor - > stats . max_rle2_len )
pCompressor - > stats . max_rle2_len = nMatchLen ;
pCompressor - > stats . total_rle2_lens + = nMatchLen ;
pCompressor - > stats . rle2_divisor + + ;
}
2019-06-07 21:15:40 +00:00
i + = nMatchLen ;
2019-07-24 13:43:44 +00:00
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
2022-04-19 07:18:09 +00:00
const int nCurSafeDist = ( i - nStartOffset ) - nOutOffset ;
2019-07-24 13:43:44 +00:00
if ( nCurSafeDist > = 0 & & pCompressor - > safe_dist < nCurSafeDist )
pCompressor - > safe_dist = nCurSafeDist ;
}
2019-06-07 21:15:40 +00:00
pCompressor - > num_commands + + ;
}
else {
if ( nNumLiterals = = 0 )
nInFirstLiteralOffset = i ;
nNumLiterals + + ;
i + + ;
}
}
{
2022-01-02 06:15:23 +00:00
const int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V2 ) ? LITERALS_RUN_LEN_V2 : nNumLiterals ;
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNumLiterals ) + ( nNumLiterals < < 3 ) ;
2019-06-07 21:15:40 +00:00
if ( ( nOutOffset + ( ( nCommandSize + 7 ) > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK )
2020-06-21 22:13:14 +00:00
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 3 ) | 0xe7 ;
2019-06-07 21:15:40 +00:00
else
2022-10-15 10:10:41 +00:00
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 3 ) /* | 0x00 */ ;
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_literals_varlen_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , nNumLiterals ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
2019-10-09 16:20:22 +00:00
if ( nNumLiterals < pCompressor - > stats . min_literals | | pCompressor - > stats . min_literals = = - 1 )
pCompressor - > stats . min_literals = nNumLiterals ;
if ( nNumLiterals > pCompressor - > stats . max_literals )
pCompressor - > stats . max_literals = nNumLiterals ;
pCompressor - > stats . total_literals + = nNumLiterals ;
pCompressor - > stats . literals_divisor + + ;
2019-06-07 21:15:40 +00:00
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nInFirstLiteralOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
}
2019-07-24 13:43:44 +00:00
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
2022-04-19 07:18:09 +00:00
const int nCurSafeDist = ( i - nStartOffset ) - nOutOffset ;
2019-07-24 13:43:44 +00:00
if ( nCurSafeDist > = 0 & & pCompressor - > safe_dist < nCurSafeDist )
pCompressor - > safe_dist = nCurSafeDist ;
}
2019-06-07 21:15:40 +00:00
pCompressor - > num_commands + + ;
}
if ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) {
/* Emit EOD marker for raw block */
if ( nOutOffset > = nMaxOutDataSize )
return - 1 ;
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , 15 ) ; /* Extended match length nibble */
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 ) return - 1 ;
if ( ( nOutOffset + 1 ) > nMaxOutDataSize )
return - 1 ;
pOutData [ nOutOffset + + ] = 232 ; /* EOD match length byte */
}
if ( nCurNibbleOffset ! = - 1 ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , 0 ) ;
2019-06-07 21:15:40 +00:00
if ( nOutOffset < 0 | | nCurNibbleOffset ! = - 1 )
return - 1 ;
}
return nOutOffset ;
}
2019-07-01 07:25:19 +00:00
/**
* Emit raw block of uncompressible data
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
* @ param nStartOffset current offset in input window ( typically the number of previously compressed bytes )
* @ param nEndOffset offset to end finding matches at ( typically the size of the total input window in bytes
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
static int lzsa_write_raw_uncompressed_block_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nStartOffset , const int nEndOffset , unsigned char * pOutData , const int nMaxOutDataSize ) {
2020-07-24 15:14:01 +00:00
int nCurNibbleOffset = - 1 ;
2022-04-19 07:18:09 +00:00
const int nNumLiterals = nEndOffset - nStartOffset ;
2022-04-02 06:49:26 +00:00
const int nTokenLiteralsLen = ( nNumLiterals > = LITERALS_RUN_LEN_V2 ) ? LITERALS_RUN_LEN_V2 : nNumLiterals ;
2019-07-01 07:25:19 +00:00
int nOutOffset = 0 ;
2022-01-02 06:15:23 +00:00
const int nCommandSize = 8 /* token */ + lzsa_get_literals_varlen_size_v2 ( nNumLiterals ) + ( nNumLiterals < < 3 ) + 4 + 8 ;
2019-07-01 07:25:19 +00:00
if ( ( nOutOffset + ( ( nCommandSize + 7 ) > > 3 ) ) > nMaxOutDataSize )
return - 1 ;
pCompressor - > num_commands = 0 ;
2020-06-21 22:13:14 +00:00
pOutData [ nOutOffset + + ] = ( nTokenLiteralsLen < < 3 ) | 0xe7 ;
2019-07-01 07:25:19 +00:00
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_literals_varlen_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , nNumLiterals ) ;
2019-07-01 07:25:19 +00:00
if ( nOutOffset < 0 ) return - 1 ;
if ( nNumLiterals ! = 0 ) {
memcpy ( pOutData + nOutOffset , pInWindow + nStartOffset , nNumLiterals ) ;
nOutOffset + = nNumLiterals ;
}
/* Emit EOD marker for raw block */
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , 15 ) ; /* Extended match length nibble */
2019-07-01 07:25:19 +00:00
if ( nOutOffset < 0 ) return - 1 ;
if ( ( nOutOffset + 1 ) > nMaxOutDataSize )
return - 1 ;
pOutData [ nOutOffset + + ] = 232 ; /* EOD match length byte */
pCompressor - > num_commands + + ;
if ( nCurNibbleOffset ! = - 1 ) {
2020-07-24 15:14:01 +00:00
nOutOffset = lzsa_write_nibble_v2 ( pOutData , nOutOffset , nMaxOutDataSize , & nCurNibbleOffset , 0 ) ;
2019-07-01 07:25:19 +00:00
if ( nOutOffset < 0 | | nCurNibbleOffset ! = - 1 )
return - 1 ;
}
return nOutOffset ;
}
2019-06-07 21:15:40 +00:00
/**
* Select the most optimal matches , reduce the token count if possible , and then emit a block of compressed LZSA2 data
*
* @ param pCompressor compression context
* @ param pInWindow pointer to input data window ( previously compressed bytes + bytes to compress )
2019-07-23 21:28:52 +00:00
* @ param nPreviousBlockSize number of previously compressed bytes ( or 0 for none )
* @ param nInDataSize number of input bytes to compress
2019-06-07 21:15:40 +00:00
* @ param pOutData pointer to output buffer
* @ param nMaxOutDataSize maximum size of output buffer , in bytes
*
* @ return size of compressed data in output buffer , or - 1 if the data is uncompressible
*/
int lzsa_optimize_and_write_block_v2 ( lzsa_compressor * pCompressor , const unsigned char * pInWindow , const int nPreviousBlockSize , const int nInDataSize , unsigned char * pOutData , const int nMaxOutDataSize ) {
2022-01-02 06:15:23 +00:00
const int nEndOffset = nPreviousBlockSize + nInDataSize ;
const int nArrivalsPerPosition = ( nInDataSize < 65536 ) ? NARRIVALS_PER_POSITION_V2_BIG : NARRIVALS_PER_POSITION_V2_SMALL ;
int nResult ;
2020-12-17 13:47:25 +00:00
int * rle_len = ( int * ) pCompressor - > intervals /* reuse */ ;
2022-01-02 06:15:23 +00:00
int i , nDidReduce , nPasses ;
2020-07-13 17:34:07 +00:00
i = 0 ;
2022-01-02 06:15:23 +00:00
while ( i < nEndOffset ) {
2020-07-13 17:34:07 +00:00
int nRangeStartIdx = i ;
2022-04-02 06:49:26 +00:00
const unsigned char c = pInWindow [ nRangeStartIdx ] ;
2020-07-13 17:34:07 +00:00
do {
i + + ;
2022-01-02 06:15:23 +00:00
} while ( i < nEndOffset & & pInWindow [ i ] = = c ) ;
2020-07-13 17:34:07 +00:00
while ( nRangeStartIdx < i ) {
2020-12-17 13:47:25 +00:00
rle_len [ nRangeStartIdx ] = i - nRangeStartIdx ;
nRangeStartIdx + + ;
2020-07-13 17:34:07 +00:00
}
}
2019-07-01 07:25:19 +00:00
2019-10-11 07:05:58 +00:00
/* Compress optimally without breaking ties in favor of less tokens */
2019-11-18 11:10:23 +00:00
memset ( pCompressor - > best_match , 0 , BLOCK_SIZE * sizeof ( lzsa_match ) ) ;
2023-01-30 12:21:43 +00:00
lzsa_optimize_forward_v2 ( pCompressor , pInWindow , nPreviousBlockSize , nEndOffset , 0 /* reduce */ , ( nInDataSize < 65536 ) ? 1 : 0 /* insert forward reps */ , nArrivalsPerPosition ) ;
2019-10-11 07:05:58 +00:00
2022-01-02 06:15:23 +00:00
if ( nInDataSize < 65536 ) {
2021-10-12 13:52:11 +00:00
int * first_offset_for_byte = pCompressor - > first_offset_for_byte ;
int * next_offset_for_pos = pCompressor - > next_offset_for_pos ;
int * offset_cache = pCompressor - > offset_cache ;
int nPosition ;
2019-10-11 07:05:58 +00:00
2021-10-12 13:52:11 +00:00
/* Supplement small matches */
2019-10-11 07:05:58 +00:00
2021-10-12 13:52:11 +00:00
memset ( first_offset_for_byte , 0xff , sizeof ( int ) * 65536 ) ;
memset ( next_offset_for_pos , 0xff , sizeof ( int ) * nInDataSize ) ;
2019-10-11 07:05:58 +00:00
2021-10-12 13:52:11 +00:00
for ( nPosition = nPreviousBlockSize ; nPosition < nEndOffset - 1 ; nPosition + + ) {
next_offset_for_pos [ nPosition - nPreviousBlockSize ] = first_offset_for_byte [ ( ( unsigned int ) pInWindow [ nPosition ] ) | ( ( ( unsigned int ) pInWindow [ nPosition + 1 ] ) < < 8 ) ] ;
first_offset_for_byte [ ( ( unsigned int ) pInWindow [ nPosition ] ) | ( ( ( unsigned int ) pInWindow [ nPosition + 1 ] ) < < 8 ) ] = nPosition ;
}
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
for ( nPosition = nPreviousBlockSize + 1 ; nPosition < ( nEndOffset - 1 ) ; nPosition + + ) {
2022-04-02 06:49:26 +00:00
const int nMaxMatchLen = ( ( nPosition + 16 ) < nEndOffset ) ? 16 : ( nEndOffset - nPosition ) ;
2021-10-12 13:52:11 +00:00
lzsa_match * match = pCompressor - > match + ( ( nPosition - nPreviousBlockSize ) < < MATCHES_PER_INDEX_SHIFT_V2 ) ;
int m = 0 , nInserted = 0 ;
int nMatchPos ;
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
while ( m < 15 & & match [ m ] . length )
m + + ;
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
for ( nMatchPos = next_offset_for_pos [ nPosition - nPreviousBlockSize ] ; m < 15 & & nMatchPos > = 0 ; nMatchPos = next_offset_for_pos [ nMatchPos - nPreviousBlockSize ] ) {
2022-04-02 06:49:26 +00:00
const int nMatchOffset = nPosition - nMatchPos ;
2021-10-12 13:52:11 +00:00
int nAlreadyExists = 0 ;
2022-04-02 06:49:26 +00:00
int nExistingMatchIdx ;
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
for ( nExistingMatchIdx = 0 ; nExistingMatchIdx < m ; nExistingMatchIdx + + ) {
if ( match [ nExistingMatchIdx ] . offset = = nMatchOffset ) {
nAlreadyExists = 1 ;
break ;
}
}
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
if ( ! nAlreadyExists ) {
int nMatchLen = 2 ;
2022-04-02 06:49:26 +00:00
while ( ( nMatchLen + 8 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 8 ) )
nMatchLen + = 8 ;
while ( ( nMatchLen + 4 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 4 ) )
2021-10-12 13:52:11 +00:00
nMatchLen + = 4 ;
2022-04-02 06:49:26 +00:00
while ( nMatchLen < nMaxMatchLen & & pInWindow [ nPosition + nMatchLen ] = = pInWindow [ nMatchPos + nMatchLen ] )
2021-10-12 13:52:11 +00:00
nMatchLen + + ;
match [ m ] . length = nMatchLen ;
match [ m ] . offset = nMatchOffset ;
m + + ;
nInserted + + ;
if ( nInserted > = 12 )
break ;
}
2020-08-18 07:13:54 +00:00
}
2021-10-12 13:52:11 +00:00
}
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
/* Supplement matches further */
memset ( offset_cache , 0xff , sizeof ( int ) * 2048 ) ;
for ( nPosition = nPreviousBlockSize + 1 ; nPosition < ( nEndOffset - 1 ) ; nPosition + + ) {
lzsa_match * match = pCompressor - > match + ( ( nPosition - nPreviousBlockSize ) < < MATCHES_PER_INDEX_SHIFT_V2 ) ;
if ( match [ 0 ] . length < 5 ) {
2022-04-02 06:49:26 +00:00
const int nMaxMatchLen = ( ( nPosition + 16 ) < nEndOffset ) ? 16 : ( nEndOffset - nPosition ) ;
2022-01-02 06:15:23 +00:00
int m = 0 , nInserted = 0 ;
2020-08-18 07:13:54 +00:00
int nMatchPos ;
2021-10-12 13:52:11 +00:00
int nMaxForwardPos = nPosition + 2 + 1 + 2 ;
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
if ( nMaxForwardPos > ( nEndOffset - 2 ) )
nMaxForwardPos = nEndOffset - 2 ;
while ( m < 46 & & match [ m ] . length ) {
offset_cache [ match [ m ] . offset & 2047 ] = nPosition ;
2020-08-18 07:13:54 +00:00
m + + ;
2021-10-12 13:52:11 +00:00
}
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
for ( nMatchPos = next_offset_for_pos [ nPosition - nPreviousBlockSize ] ; m < 46 & & nMatchPos > = 0 ; nMatchPos = next_offset_for_pos [ nMatchPos - nPreviousBlockSize ] ) {
2022-04-02 06:49:26 +00:00
const int nMatchOffset = nPosition - nMatchPos ;
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
if ( nMatchOffset < = MAX_OFFSET ) {
int nAlreadyExists = 0 ;
if ( offset_cache [ nMatchOffset & 2047 ] = = nPosition ) {
int nExistingMatchIdx ;
for ( nExistingMatchIdx = 0 ; nExistingMatchIdx < m ; nExistingMatchIdx + + ) {
if ( match [ nExistingMatchIdx ] . offset = = nMatchOffset ) {
nAlreadyExists = 1 ;
break ;
}
}
2020-08-18 07:13:54 +00:00
}
2021-10-12 13:52:11 +00:00
if ( ! nAlreadyExists ) {
int nForwardPos = nPosition + 2 ;
if ( nForwardPos > = nMatchOffset ) {
int nGotMatch = 0 ;
while ( nForwardPos < nMaxForwardPos ) {
if ( ! memcmp ( pInWindow + nForwardPos , pInWindow + nForwardPos - nMatchOffset , 2 ) ) {
nGotMatch = 1 ;
break ;
}
nForwardPos + + ;
}
if ( nGotMatch ) {
int nMatchLen = 2 ;
2022-04-02 06:49:26 +00:00
while ( ( nMatchLen + 8 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 8 ) )
nMatchLen + = 8 ;
while ( ( nMatchLen + 4 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 4 ) )
nMatchLen + = 4 ;
while ( nMatchLen < nMaxMatchLen & & pInWindow [ nPosition + nMatchLen ] = = pInWindow [ nMatchPos + nMatchLen ] )
2021-10-12 13:52:11 +00:00
nMatchLen + + ;
match [ m ] . length = nMatchLen | 0x8000 ;
match [ m ] . offset = nMatchOffset ;
m + + ;
lzsa_insert_forward_match_v2 ( pCompressor , pInWindow , nPosition , nMatchOffset , nPreviousBlockSize , nEndOffset , 8 ) ;
2022-01-02 06:15:23 +00:00
nInserted + + ;
2022-04-02 06:49:26 +00:00
if ( nInserted > = 3 )
2022-01-02 06:15:23 +00:00
break ;
2021-10-12 13:52:11 +00:00
}
}
}
}
else {
break ;
2020-08-18 07:13:54 +00:00
}
}
}
2021-10-12 13:52:11 +00:00
}
2020-08-18 07:13:54 +00:00
2021-10-12 13:52:11 +00:00
for ( nPosition = nPreviousBlockSize + 1 ; nPosition < ( nEndOffset - 1 ) ; nPosition + + ) {
lzsa_match * match = pCompressor - > match + ( ( nPosition - nPreviousBlockSize ) < < MATCHES_PER_INDEX_SHIFT_V2 ) ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
if ( match [ 0 ] . length < 8 ) {
2022-04-02 06:49:26 +00:00
const int nMaxMatchLen = ( ( nPosition + 16 ) < nEndOffset ) ? 16 : ( nEndOffset - nPosition ) ;
2021-10-12 13:52:11 +00:00
int m = 0 , nInserted = 0 ;
int nMatchPos ;
int nMaxForwardPos = nPosition + 2 + 1 + 6 ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
if ( nMaxForwardPos > ( nEndOffset - 2 ) )
nMaxForwardPos = nEndOffset - 2 ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
while ( m < 63 & & match [ m ] . length ) {
offset_cache [ match [ m ] . offset & 2047 ] = nPosition ;
m + + ;
}
2021-10-05 10:11:39 +00:00
2021-10-12 13:52:11 +00:00
for ( nMatchPos = next_offset_for_pos [ nPosition - nPreviousBlockSize ] ; m < 63 & & nMatchPos > = 0 ; nMatchPos = next_offset_for_pos [ nMatchPos - nPreviousBlockSize ] ) {
2022-04-02 06:49:26 +00:00
const int nMatchOffset = nPosition - nMatchPos ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
if ( nMatchOffset < = MAX_OFFSET ) {
int nAlreadyExists = 0 ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
if ( offset_cache [ nMatchOffset & 2047 ] = = nPosition ) {
int nExistingMatchIdx ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
for ( nExistingMatchIdx = 0 ; nExistingMatchIdx < m ; nExistingMatchIdx + + ) {
if ( match [ nExistingMatchIdx ] . offset = = nMatchOffset ) {
nAlreadyExists = 1 ;
break ;
}
}
}
if ( ! nAlreadyExists ) {
int nForwardPos = nPosition + 2 ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
if ( nForwardPos > = nMatchOffset ) {
int nGotMatch = 0 ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
while ( nForwardPos < nMaxForwardPos ) {
if ( ! memcmp ( pInWindow + nForwardPos , pInWindow + nForwardPos - nMatchOffset , 2 ) ) {
nGotMatch = 1 ;
2021-06-04 17:11:22 +00:00
break ;
}
2021-10-12 13:52:11 +00:00
nForwardPos + + ;
2021-06-04 17:11:22 +00:00
}
2021-10-12 13:52:11 +00:00
if ( nGotMatch ) {
int nMatchLen = 2 ;
2022-04-02 06:49:26 +00:00
while ( ( nMatchLen + 8 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 8 ) )
nMatchLen + = 8 ;
while ( ( nMatchLen + 4 ) < nMaxMatchLen & & ! memcmp ( pInWindow + nPosition + nMatchLen , pInWindow + nMatchPos + nMatchLen , 4 ) )
nMatchLen + = 4 ;
while ( nMatchLen < nMaxMatchLen & & pInWindow [ nPosition + nMatchLen ] = = pInWindow [ nMatchPos + nMatchLen ] )
2021-10-12 13:52:11 +00:00
nMatchLen + + ;
2021-10-05 10:11:39 +00:00
2021-10-12 13:52:11 +00:00
match [ m ] . length = nMatchLen ;
match [ m ] . offset = nMatchOffset ;
m + + ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
lzsa_insert_forward_match_v2 ( pCompressor , pInWindow , nPosition , nMatchOffset , nPreviousBlockSize , nEndOffset , 8 ) ;
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
nInserted + + ;
2022-04-02 06:49:26 +00:00
if ( nInserted > = 12 )
2021-10-05 10:11:39 +00:00
break ;
2021-06-04 17:11:22 +00:00
}
}
}
2021-10-12 13:52:11 +00:00
}
else {
break ;
2021-06-04 17:11:22 +00:00
}
}
}
2021-10-12 13:52:11 +00:00
}
2021-06-04 17:11:22 +00:00
2021-10-12 13:52:11 +00:00
/* Compress optimally and do break ties in favor of less tokens */
2023-01-30 12:21:43 +00:00
lzsa_optimize_forward_v2 ( pCompressor , pInWindow , nPreviousBlockSize , nEndOffset , 1 /* reduce */ , 0 /* use forward reps */ , NARRIVALS_PER_POSITION_V2_MAX ) ;
2019-10-11 07:05:58 +00:00
}
2022-01-02 06:15:23 +00:00
/* Try to reduce final command set, wherever possible */
nPasses = 0 ;
do {
2023-01-30 12:21:43 +00:00
nDidReduce = lzsa_optimize_command_count_v2 ( pCompressor , pInWindow , nPreviousBlockSize , nEndOffset ) ;
2022-01-02 06:15:23 +00:00
nPasses + + ;
} while ( nDidReduce & & nPasses < 20 ) ;
/* Write compressed block */
2023-01-30 12:21:43 +00:00
nResult = lzsa_write_block_v2 ( pCompressor , pInWindow , nPreviousBlockSize , nEndOffset , pOutData , nMaxOutDataSize ) ;
2022-04-19 07:18:09 +00:00
if ( nResult < 0 & & ( pCompressor - > flags & LZSA_FLAG_RAW_BLOCK ) ) {
2023-01-30 12:21:43 +00:00
nResult = lzsa_write_raw_uncompressed_block_v2 ( pCompressor , pInWindow , nPreviousBlockSize , nEndOffset , pOutData , nMaxOutDataSize ) ;
2019-07-01 07:25:19 +00:00
}
2019-06-07 21:15:40 +00:00
2019-07-01 07:25:19 +00:00
return nResult ;
2019-06-07 21:15:40 +00:00
}