Add context to libdivsufsort, don't allocate memory during compression

This commit is contained in:
emmanuel-marty 2019-04-07 00:01:22 +02:00
parent e24320b23b
commit 6aa2dae4b3
4 changed files with 89 additions and 32 deletions

View File

@ -50,17 +50,38 @@ typedef int saidx_t;
#define PRIdSAIDX_T "d"
#endif
/*- divsufsort context */
typedef struct _divsufsort_ctx_t {
saidx_t *bucket_A;
saidx_t *bucket_B;
} divsufsort_ctx_t;
/*- Prototypes -*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx);
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx);
/**
* Constructs the suffix array of a given string.
* @param ctx suffix array context
* @param T[0..n-1] The input string.
* @param SA[0..n-1] The output array of suffixes.
* @param n The length of the given string.
* @return 0 if no error occurred, -1 or -2 otherwise.
*/
DIVSUFSORT_API
saint_t divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
#if 0
/**

View File

@ -327,11 +327,47 @@ construct_BWT(const sauchar_t *T, saidx_t *SA,
/*---------------------------------------------------------------------------*/
/**
* Initialize suffix array context
*
* @return 0 for success, or non-zero in case of an error
*/
int divsufsort_init(divsufsort_ctx_t *ctx) {
ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
ctx->bucket_B = NULL;
if (ctx->bucket_A) {
ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
if (ctx->bucket_B)
return 0;
}
divsufsort_destroy(ctx);
return -1;
}
/**
* Destroy suffix array context
*
* @param ctx suffix array context to destroy
*/
void divsufsort_destroy(divsufsort_ctx_t *ctx) {
if (ctx->bucket_B) {
free(ctx->bucket_B);
ctx->bucket_B = NULL;
}
if (ctx->bucket_A) {
free(ctx->bucket_A);
ctx->bucket_A = NULL;
}
}
/*- Function -*/
saint_t
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t *bucket_A, *bucket_B;
divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
saidx_t m;
saint_t err = 0;
@ -341,20 +377,14 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
else if(n == 1) { SA[0] = 0; return 0; }
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
/* Suffixsort. */
if((bucket_A != NULL) && (bucket_B != NULL)) {
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
construct_SA(T, SA, bucket_A, bucket_B, n, m);
if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
} else {
err = -2;
}
free(bucket_B);
free(bucket_A);
return err;
}

View File

@ -4,6 +4,7 @@
* The following copying information applies to this specific source code file:
*
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
*
* To the extent possible under law, the author(s) have dedicated all copyright
@ -24,12 +25,12 @@
*
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
*
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "divsufsort.h"
#include "shrink.h"
#include "format.h"
@ -62,36 +63,35 @@ typedef struct _lzsa_match {
* @return 0 for success, non-zero for failure
*/
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
int nResult;
nResult = divsufsort_init(&pCompressor->divsufsort_context);
pCompressor->intervals = NULL;
pCompressor->pos_data = NULL;
pCompressor->open_intervals = NULL;
pCompressor->match = NULL;
pCompressor->num_commands = 0;
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (!nResult) {
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->intervals) {
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
if (pCompressor->pos_data) {
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
if (pCompressor->match)
return 0;
if (pCompressor->open_intervals) {
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
free(pCompressor->open_intervals);
pCompressor->open_intervals = NULL;
if (pCompressor->match)
return 0;
}
}
free(pCompressor->pos_data);
pCompressor->pos_data = NULL;
}
free(pCompressor->intervals);
pCompressor->intervals = NULL;
}
lzsa_compressor_destroy(pCompressor);
return 100;
}
@ -101,6 +101,8 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize)
* @param pCompressor compression context to clean up
*/
void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
divsufsort_destroy(&pCompressor->divsufsort_context);
if (pCompressor->match) {
free(pCompressor->match);
pCompressor->match = NULL;
@ -135,7 +137,7 @@ static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned
unsigned int *intervals = pCompressor->intervals;
/* Build suffix array from input data */
if (divsufsort(pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
return 100;
}
@ -700,7 +702,8 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
*/
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize);
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
return -1;
if (nPreviousBlockSize) {
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
}

View File

@ -23,11 +23,14 @@
#ifndef _SHRINK_H
#define _SHRINK_H
#include "divsufsort.h"
/* Forward declarations */
typedef struct _lzsa_match lzsa_match;
/** Compression context */
typedef struct {
divsufsort_ctx_t divsufsort_context;
unsigned int *intervals;
unsigned int *pos_data;
unsigned int *open_intervals;