mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-04-06 05:41:29 +00:00
Add context to libdivsufsort, don't allocate memory during compression
This commit is contained in:
parent
e24320b23b
commit
6aa2dae4b3
@ -50,17 +50,38 @@ typedef int saidx_t;
|
||||
#define PRIdSAIDX_T "d"
|
||||
#endif
|
||||
|
||||
/*- divsufsort context */
|
||||
typedef struct _divsufsort_ctx_t {
|
||||
saidx_t *bucket_A;
|
||||
saidx_t *bucket_B;
|
||||
} divsufsort_ctx_t;
|
||||
|
||||
/*- Prototypes -*/
|
||||
|
||||
/**
|
||||
* Initialize suffix array context
|
||||
*
|
||||
* @return 0 for success, or non-zero in case of an error
|
||||
*/
|
||||
int divsufsort_init(divsufsort_ctx_t *ctx);
|
||||
|
||||
/**
|
||||
* Destroy suffix array context
|
||||
*
|
||||
* @param ctx suffix array context to destroy
|
||||
*/
|
||||
void divsufsort_destroy(divsufsort_ctx_t *ctx);
|
||||
|
||||
/**
|
||||
* Constructs the suffix array of a given string.
|
||||
* @param ctx suffix array context
|
||||
* @param T[0..n-1] The input string.
|
||||
* @param SA[0..n-1] The output array of suffixes.
|
||||
* @param n The length of the given string.
|
||||
* @return 0 if no error occurred, -1 or -2 otherwise.
|
||||
*/
|
||||
DIVSUFSORT_API
|
||||
saint_t divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n);
|
||||
saint_t divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n);
|
||||
|
||||
#if 0
|
||||
/**
|
||||
|
@ -327,11 +327,47 @@ construct_BWT(const sauchar_t *T, saidx_t *SA,
|
||||
|
||||
/*---------------------------------------------------------------------------*/
|
||||
|
||||
/**
|
||||
* Initialize suffix array context
|
||||
*
|
||||
* @return 0 for success, or non-zero in case of an error
|
||||
*/
|
||||
int divsufsort_init(divsufsort_ctx_t *ctx) {
|
||||
ctx->bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
|
||||
ctx->bucket_B = NULL;
|
||||
|
||||
if (ctx->bucket_A) {
|
||||
ctx->bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
|
||||
|
||||
if (ctx->bucket_B)
|
||||
return 0;
|
||||
}
|
||||
|
||||
divsufsort_destroy(ctx);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/**
|
||||
* Destroy suffix array context
|
||||
*
|
||||
* @param ctx suffix array context to destroy
|
||||
*/
|
||||
void divsufsort_destroy(divsufsort_ctx_t *ctx) {
|
||||
if (ctx->bucket_B) {
|
||||
free(ctx->bucket_B);
|
||||
ctx->bucket_B = NULL;
|
||||
}
|
||||
|
||||
if (ctx->bucket_A) {
|
||||
free(ctx->bucket_A);
|
||||
ctx->bucket_A = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/*- Function -*/
|
||||
|
||||
saint_t
|
||||
divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
|
||||
saidx_t *bucket_A, *bucket_B;
|
||||
divsufsort_build_array(divsufsort_ctx_t *ctx, const sauchar_t *T, saidx_t *SA, saidx_t n) {
|
||||
saidx_t m;
|
||||
saint_t err = 0;
|
||||
|
||||
@ -341,20 +377,14 @@ divsufsort(const sauchar_t *T, saidx_t *SA, saidx_t n) {
|
||||
else if(n == 1) { SA[0] = 0; return 0; }
|
||||
else if(n == 2) { m = (T[0] < T[1]); SA[m ^ 1] = 0, SA[m] = 1; return 0; }
|
||||
|
||||
bucket_A = (saidx_t *)malloc(BUCKET_A_SIZE * sizeof(saidx_t));
|
||||
bucket_B = (saidx_t *)malloc(BUCKET_B_SIZE * sizeof(saidx_t));
|
||||
|
||||
/* Suffixsort. */
|
||||
if((bucket_A != NULL) && (bucket_B != NULL)) {
|
||||
m = sort_typeBstar(T, SA, bucket_A, bucket_B, n);
|
||||
construct_SA(T, SA, bucket_A, bucket_B, n, m);
|
||||
if((ctx->bucket_A != NULL) && (ctx->bucket_B != NULL)) {
|
||||
m = sort_typeBstar(T, SA, ctx->bucket_A, ctx->bucket_B, n);
|
||||
construct_SA(T, SA, ctx->bucket_A, ctx->bucket_B, n, m);
|
||||
} else {
|
||||
err = -2;
|
||||
}
|
||||
|
||||
free(bucket_B);
|
||||
free(bucket_A);
|
||||
|
||||
return err;
|
||||
}
|
||||
|
||||
|
43
src/shrink.c
43
src/shrink.c
@ -4,6 +4,7 @@
|
||||
* The following copying information applies to this specific source code file:
|
||||
*
|
||||
* Written in 2019 by Emmanuel Marty <marty.emmanuel@gmail.com>
|
||||
* With help, ideas, optimizations and speed measurements by spke <zxintrospec@gmail.com>
|
||||
* Portions written in 2014-2015 by Eric Biggers <ebiggers3@gmail.com>
|
||||
*
|
||||
* To the extent possible under law, the author(s) have dedicated all copyright
|
||||
@ -24,12 +25,12 @@
|
||||
*
|
||||
* Inspired by LZ4 by Yann Collet. https://github.com/lz4/lz4
|
||||
* With ideas from Lizard by Przemyslaw Skibinski and Yann Collet. https://github.com/inikep/lizard
|
||||
*
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "divsufsort.h"
|
||||
#include "shrink.h"
|
||||
#include "format.h"
|
||||
|
||||
@ -62,36 +63,35 @@ typedef struct _lzsa_match {
|
||||
* @return 0 for success, non-zero for failure
|
||||
*/
|
||||
int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize) {
|
||||
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
int nResult;
|
||||
|
||||
nResult = divsufsort_init(&pCompressor->divsufsort_context);
|
||||
pCompressor->intervals = NULL;
|
||||
pCompressor->pos_data = NULL;
|
||||
pCompressor->open_intervals = NULL;
|
||||
pCompressor->match = NULL;
|
||||
pCompressor->num_commands = 0;
|
||||
|
||||
if (pCompressor->intervals) {
|
||||
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
if (!nResult) {
|
||||
pCompressor->intervals = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->pos_data) {
|
||||
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
|
||||
if (pCompressor->intervals) {
|
||||
pCompressor->pos_data = (unsigned int *)malloc(nMaxWindowSize * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->open_intervals) {
|
||||
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
|
||||
if (pCompressor->pos_data) {
|
||||
pCompressor->open_intervals = (unsigned int *)malloc((LCP_MAX + 1) * sizeof(unsigned int));
|
||||
|
||||
if (pCompressor->match)
|
||||
return 0;
|
||||
if (pCompressor->open_intervals) {
|
||||
pCompressor->match = (lzsa_match *)malloc(nMaxWindowSize * NMATCHES_PER_OFFSET * sizeof(lzsa_match));
|
||||
|
||||
free(pCompressor->open_intervals);
|
||||
pCompressor->open_intervals = NULL;
|
||||
if (pCompressor->match)
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
free(pCompressor->pos_data);
|
||||
pCompressor->pos_data = NULL;
|
||||
}
|
||||
|
||||
free(pCompressor->intervals);
|
||||
pCompressor->intervals = NULL;
|
||||
}
|
||||
|
||||
lzsa_compressor_destroy(pCompressor);
|
||||
return 100;
|
||||
}
|
||||
|
||||
@ -101,6 +101,8 @@ int lzsa_compressor_init(lsza_compressor *pCompressor, const int nMaxWindowSize)
|
||||
* @param pCompressor compression context to clean up
|
||||
*/
|
||||
void lzsa_compressor_destroy(lsza_compressor *pCompressor) {
|
||||
divsufsort_destroy(&pCompressor->divsufsort_context);
|
||||
|
||||
if (pCompressor->match) {
|
||||
free(pCompressor->match);
|
||||
pCompressor->match = NULL;
|
||||
@ -135,7 +137,7 @@ static int lzsa_build_suffix_array(lsza_compressor *pCompressor, const unsigned
|
||||
unsigned int *intervals = pCompressor->intervals;
|
||||
|
||||
/* Build suffix array from input data */
|
||||
if (divsufsort(pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
|
||||
if (divsufsort_build_array(&pCompressor->divsufsort_context, pInWindow, (saidx_t*)intervals, nInWindowSize) != 0) {
|
||||
return 100;
|
||||
}
|
||||
|
||||
@ -700,7 +702,8 @@ static int lzsa_write_block(lsza_compressor *pCompressor, const unsigned char *p
|
||||
* @return size of compressed data in output buffer, or -1 if the data is uncompressible
|
||||
*/
|
||||
int lzsa_shrink_block(lsza_compressor *pCompressor, const unsigned char *pInWindow, const int nPreviousBlockSize, const int nInDataSize, unsigned char *pOutData, const int nMaxOutDataSize) {
|
||||
lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize);
|
||||
if (lzsa_build_suffix_array(pCompressor, pInWindow, nPreviousBlockSize + nInDataSize))
|
||||
return -1;
|
||||
if (nPreviousBlockSize) {
|
||||
lzsa_skip_matches(pCompressor, 0, nPreviousBlockSize);
|
||||
}
|
||||
|
@ -23,11 +23,14 @@
|
||||
#ifndef _SHRINK_H
|
||||
#define _SHRINK_H
|
||||
|
||||
#include "divsufsort.h"
|
||||
|
||||
/* Forward declarations */
|
||||
typedef struct _lzsa_match lzsa_match;
|
||||
|
||||
/** Compression context */
|
||||
typedef struct {
|
||||
divsufsort_ctx_t divsufsort_context;
|
||||
unsigned int *intervals;
|
||||
unsigned int *pos_data;
|
||||
unsigned int *open_intervals;
|
||||
|
Loading…
x
Reference in New Issue
Block a user