From 6aa2dae4b3f818f419c99141f126577941845edb Mon Sep 17 00:00:00 2001 From: emmanuel-marty Date: Sun, 7 Apr 2019 00:01:22 +0200 Subject: [PATCH] Add context to libdivsufsort, don't allocate memory during compression --- src/libdivsufsort/include/divsufsort.h | 23 +++++++++++- src/libdivsufsort/lib/divsufsort.c | 52 ++++++++++++++++++++------ src/shrink.c | 43 +++++++++++---------- src/shrink.h | 3 ++ 4 files changed, 89 insertions(+), 32 deletions(-) diff --git a/src/libdivsufsort/include/divsufsort.h b/src/libdivsufsort/include/divsufsort.h index 86caf01..7ebb412 100755 --- a/src/libdivsufsort/include/divsufsort.h +++ b/src/libdivsufsort/include/divsufsort.h @@ -50,17 +50,38 @@ typedef int saidx_t; #define PRIdSAIDX_T "d" #endif +/*- divsufsort context */ +typedef struct _divsufsort_ctx_t { + saidx_t *bucket_A; + saidx_t *bucket_B; +} divsufsort_ctx_t; + /*- Prototypes -*/ +/** + * Initialize suffix array context + * + * @return 0 for success, or non-zero in case of an error + */ +int divsufsort_init(divsufsort_ctx_t *ctx); + +/** + * Destroy suffix array context + * + * @param ctx suffix array context to destroy + */ +void divsufsort_destroy(divsufsort_ctx_t *ctx); + /** * Constructs the suffix array of a given string. + * @param ctx suffix array context * @param T[0..n-1] The input string. * @param SA[0..n-1] The output array of suffixes. * @param n The length of the given string. * @return 0 if no error occurred, -1 or -2 otherwise. */ DIVSUFSORT_API -saint_t divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n); +saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n); #if 0 /** diff --git a/src/libdivsufsort/lib/divsufsort.c b/src/libdivsufsort/lib/divsufsort.c index e03b2c4..50631ac 100755 --- a/src/libdivsufsort/lib/divsufsort.c +++ b/src/libdivsufsort/lib/divsufsort.c @@ -327,11 +327,47 @@ construct_BWT(const sauchar_t *T, saidx_t *SA, /*---------------------------------------------------------------------------*/ +/** + * Initialize suffix array context + * + * @return 0 for success, or non-zero in case of an error + */ +int divsufsort_init(divsufsort_ctx_t *ctx) { + ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); + ctx->bucket_B = NULL; + + if (ctx->bucket_A) { + ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); + + if (ctx->bucket_B) + return 0; + } + + divsufsort_destroy(ctx); + return -1; +} + +/** + * Destroy suffix array context + * + * @param ctx suffix array context to destroy + */ +void divsufsort_destroy(divsufsort_ctx_t *ctx) { + if (ctx->bucket_B) { + free(ctx->bucket_B); + ctx->bucket_B = NULL; + } + + if (ctx->bucket_A) { + free(ctx->bucket_A); + ctx->bucket_A = NULL; + } +} + /*- Function -*/ saint_t -divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { - saidx_t *bucket_A, *bucket_B; +divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) { saidx_t m; saint_t err = 0; @@ -341,20 +377,14 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) { else if(n == 1) { SA[0] = 0; return 0; } else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; } - bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t)); - bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t)); - /* Suffixsort. */ - if((bucket_A != NULL) && (bucket_B != NULL)) { - m = sort_typeBstar(T, SA, bucket_A, bucket_B, n); - construct_SA(T, SA, bucket_A, bucket_B, n, m); + if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) { + m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n); + construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m); } else { err = -2; } - free(bucket_B); - free(bucket_A); - return err; } diff --git a/src/shrink.c b/src/shrink.c index 37c12d9..b92cd7a 100755 --- a/src/shrink.c +++ b/src/shrink.c @@ -4,6 +4,7 @@ * The following copying information applies to this specific source code file: * * Written in 2019 by Emmanuel Marty + * With help, ideas, optimizations and speed measurements by spke * Portions written in 2014-2015 by Eric Biggers * * To the extent possible under law, the author(s) have dedicated all copyright @@ -24,12 +25,12 @@ * * Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4 * With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard + * */ #include #include #include -#include "divsufsort.h" #include "shrink.h" #include "format.h" @@ -62,36 +63,35 @@ typedef struct _lzsa_match { * @return 0 for success, non-zero for failure */ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) { - pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + int nResult; + + nResult = divsufsort_init(&pCompressor->divsufsort_context); + pCompressor->intervals = NULL; pCompressor->pos_data = NULL; pCompressor->open_intervals = NULL; pCompressor->match = NULL; pCompressor->num_commands = 0; - if (pCompressor->intervals) { - pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); + if (!nResult) { + pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); - if (pCompressor->pos_data) { - pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int)); + if (pCompressor->intervals) { + pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int)); - if (pCompressor->open_intervals) { - pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match)); + if (pCompressor->pos_data) { + pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int)); - if (pCompressor->match) - return 0; + if (pCompressor->open_intervals) { + pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match)); - free(pCompressor->open_intervals); - pCompressor->open_intervals = NULL; + if (pCompressor->match) + return 0; + } } - - free(pCompressor->pos_data); - pCompressor->pos_data = NULL; } - - free(pCompressor->intervals); - pCompressor->intervals = NULL; } + lzsa_compressor_destroy(pCompressor); return 100; } @@ -101,6 +101,8 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) * @param pCompressor compression context to clean up */ void lzsa_compressor_destroy(lsza_compressor *pCompressor) { + divsufsort_destroy(&pCompressor->divsufsort_context); + if (pCompressor->match) { free(pCompressor->match); pCompressor->match = NULL; @@ -135,7 +137,7 @@ static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned unsigned int *intervals = pCompressor->intervals; /* Build suffix array from input data */ - if (divsufsort(pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) { + if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) { return 100; } @@ -700,7 +702,8 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p * @return size of compressed data in output buffer, or -1 if the data is uncompressible */ int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) { - lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize); + if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize)) + return -1; if (nPreviousBlockSize) { lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize); } diff --git a/src/shrink.h b/src/shrink.h index ae6063a..7487fc8 100755 --- a/src/shrink.h +++ b/src/shrink.h @@ -23,11 +23,14 @@ #ifndef _SHRINK_H #define _SHRINK_H +#include "divsufsort.h" + /* Forward declarations */ typedef struct _lzsa_match lzsa_match; /** Compression context */ typedef struct { + divsufsort_ctx_t divsufsort_context; unsigned int *intervals; unsigned int *pos_data; unsigned int *open_intervals;