mirror of
https://github.com/emmanuel-marty/lzsa.git
synced 2025-04-07 11:39:24 +00:00
Moved
This commit is contained in:
parent
79ed7bf91e
commit
3008d47012
@ -1,383 +0,0 @@
|
||||
/*
|
||||
* utils.c for libdivsufsort
|
||||
* Copyright (c) 2003-2008 Yuta Mori All Rights Reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person
|
||||
* obtaining a copy of this software and associated documentation
|
||||
* files (the "Software"), to deal in the Software without
|
||||
* restriction, including without limitation the rights to use,
|
||||
* copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
* copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following
|
||||
* conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
||||
* OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
||||
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
||||
* HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
||||
* WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
||||
* OTHER DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#include "divsufsort_private.h"
|
||||
|
||||
|
||||
/*- Private Function -*/
|
||||
|
||||
#if 0
|
||||
/* Binary search for inverse bwt. */
|
||||
static
|
||||
saidx_t
|
||||
binarysearch_lower(const saidx_t *A, saidx_t size, saidx_t value) {
|
||||
saidx_t half, i;
|
||||
for(i = 0, half = size >> 1;
|
||||
0 < size;
|
||||
size = half, half >>= 1) {
|
||||
if(A[i + half] < value) {
|
||||
i += half + 1;
|
||||
half -= (size & 1) ^ 1;
|
||||
}
|
||||
}
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/*- Functions -*/
|
||||
|
||||
/* Burrows-Wheeler transform. */
|
||||
saint_t
|
||||
bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *SA,
|
||||
saidx_t n, saidx_t *idx) {
|
||||
saidx_t *A, i, j, p, t;
|
||||
saint_t c;
|
||||
|
||||
/* Check arguments. */
|
||||
if((T == NULL) || (U == NULL) || (n < 0) || (idx == NULL)) { return -1; }
|
||||
if(n <= 1) {
|
||||
if(n == 1) { U[0] = T[0]; }
|
||||
*idx = n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
if((A = SA) == NULL) {
|
||||
i = divbwt(T, U, NULL, n);
|
||||
if(0 <= i) { *idx = i; i = 0; }
|
||||
return (saint_t)i;
|
||||
}
|
||||
|
||||
/* BW transform. */
|
||||
if(T == U) {
|
||||
t = n;
|
||||
for(i = 0, j = 0; i < n; ++i) {
|
||||
p = t - 1;
|
||||
t = A[i];
|
||||
if(0 <= p) {
|
||||
c = T[j];
|
||||
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
|
||||
A[j] = c;
|
||||
j++;
|
||||
} else {
|
||||
*idx = i;
|
||||
}
|
||||
}
|
||||
p = t - 1;
|
||||
if(0 <= p) {
|
||||
c = T[j];
|
||||
U[j] = (j <= p) ? T[p] : (sauchar_t)A[p];
|
||||
A[j] = c;
|
||||
} else {
|
||||
*idx = i;
|
||||
}
|
||||
} else {
|
||||
U[0] = T[n - 1];
|
||||
for(i = 0; A[i] != 0; ++i) { U[i + 1] = T[A[i] - 1]; }
|
||||
*idx = i + 1;
|
||||
for(++i; i < n; ++i) { U[i] = T[A[i] - 1]; }
|
||||
}
|
||||
|
||||
if(SA == NULL) {
|
||||
/* Deallocate memory. */
|
||||
free(A);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Inverse Burrows-Wheeler transform. */
|
||||
saint_t
|
||||
inverse_bw_transform(const sauchar_t *T, sauchar_t *U, saidx_t *A,
|
||||
saidx_t n, saidx_t idx) {
|
||||
saidx_t C[ALPHABET_SIZE];
|
||||
sauchar_t D[ALPHABET_SIZE];
|
||||
saidx_t *B;
|
||||
saidx_t i, p;
|
||||
saint_t c, d;
|
||||
|
||||
/* Check arguments. */
|
||||
if((T == NULL) || (U == NULL) || (n < 0) || (idx < 0) ||
|
||||
(n < idx) || ((0 < n) && (idx == 0))) {
|
||||
return -1;
|
||||
}
|
||||
if(n <= 1) { return 0; }
|
||||
|
||||
if((B = A) == NULL) {
|
||||
/* Allocate n*sizeof(saidx_t) bytes of memory. */
|
||||
if((B = (saidx_t *)malloc((size_t)n * sizeof(saidx_t))) == NULL) { return -2; }
|
||||
}
|
||||
|
||||
/* Inverse BW transform. */
|
||||
for(c = 0; c < ALPHABET_SIZE; ++c) { C[c] = 0; }
|
||||
for(i = 0; i < n; ++i) { ++C[T[i]]; }
|
||||
for(c = 0, d = 0, i = 0; c < ALPHABET_SIZE; ++c) {
|
||||
p = C[c];
|
||||
if(0 < p) {
|
||||
C[c] = i;
|
||||
D[d++] = (sauchar_t)c;
|
||||
i += p;
|
||||
}
|
||||
}
|
||||
for(i = 0; i < idx; ++i) { B[C[T[i]]++] = i; }
|
||||
for( ; i < n; ++i) { B[C[T[i]]++] = i + 1; }
|
||||
for(c = 0; c < d; ++c) { C[c] = C[D[c]]; }
|
||||
for(i = 0, p = idx; i < n; ++i) {
|
||||
U[i] = D[binarysearch_lower(C, d, p)];
|
||||
p = B[p - 1];
|
||||
}
|
||||
|
||||
if(A == NULL) {
|
||||
/* Deallocate memory. */
|
||||
free(B);
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* Checks the suffix array SA of the string T. */
|
||||
saint_t
|
||||
sufcheck(const sauchar_t *T, const saidx_t *SA,
|
||||
saidx_t n, saint_t verbose) {
|
||||
saidx_t C[ALPHABET_SIZE];
|
||||
saidx_t i, p, q, t;
|
||||
saint_t c;
|
||||
|
||||
if(verbose) { fprintf(stderr, "sufcheck: "); }
|
||||
|
||||
/* Check arguments. */
|
||||
if((T == NULL) || (SA == NULL) || (n < 0)) {
|
||||
if(verbose) { fprintf(stderr, "Invalid arguments.\n"); }
|
||||
return -1;
|
||||
}
|
||||
if(n == 0) {
|
||||
if(verbose) { fprintf(stderr, "Done.\n"); }
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* check range: [0..n-1] */
|
||||
for(i = 0; i < n; ++i) {
|
||||
if((SA[i] < 0) || (n <= SA[i])) {
|
||||
if(verbose) {
|
||||
fprintf(stderr, "Out of the range [0,%" PRIdSAIDX_T "].\n"
|
||||
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
|
||||
n - 1, i, SA[i]);
|
||||
}
|
||||
return -2;
|
||||
}
|
||||
}
|
||||
|
||||
/* check first characters. */
|
||||
for(i = 1; i < n; ++i) {
|
||||
if(T[SA[i - 1]] > T[SA[i]]) {
|
||||
if(verbose) {
|
||||
fprintf(stderr, "Suffixes in wrong order.\n"
|
||||
" T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d"
|
||||
" > T[SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "]=%d\n",
|
||||
i - 1, SA[i - 1], T[SA[i - 1]], i, SA[i], T[SA[i]]);
|
||||
}
|
||||
return -3;
|
||||
}
|
||||
}
|
||||
|
||||
/* check suffixes. */
|
||||
for(i = 0; i < ALPHABET_SIZE; ++i) { C[i] = 0; }
|
||||
for(i = 0; i < n; ++i) { ++C[T[i]]; }
|
||||
for(i = 0, p = 0; i < ALPHABET_SIZE; ++i) {
|
||||
t = C[i];
|
||||
C[i] = p;
|
||||
p += t;
|
||||
}
|
||||
|
||||
q = C[T[n - 1]];
|
||||
C[T[n - 1]] += 1;
|
||||
for(i = 0; i < n; ++i) {
|
||||
p = SA[i];
|
||||
if(0 < p) {
|
||||
c = T[--p];
|
||||
t = C[c];
|
||||
} else {
|
||||
c = T[p = n - 1];
|
||||
t = q;
|
||||
}
|
||||
if((t < 0) || (p != SA[t])) {
|
||||
if(verbose) {
|
||||
fprintf(stderr, "Suffix in wrong position.\n"
|
||||
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T " or\n"
|
||||
" SA[%" PRIdSAIDX_T "]=%" PRIdSAIDX_T "\n",
|
||||
t, (0 <= t) ? SA[t] : -1, i, SA[i]);
|
||||
}
|
||||
return -4;
|
||||
}
|
||||
if(t != q) {
|
||||
++C[c];
|
||||
if((n <= C[c]) || (T[SA[C[c]]] != c)) { C[c] = -1; }
|
||||
}
|
||||
}
|
||||
|
||||
if(1 <= verbose) { fprintf(stderr, "Done.\n"); }
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
static
|
||||
int
|
||||
_compare(const sauchar_t *T, saidx_t Tsize,
|
||||
const sauchar_t *P, saidx_t Psize,
|
||||
saidx_t suf, saidx_t *match) {
|
||||
saidx_t i, j;
|
||||
saint_t r;
|
||||
for(i = suf + *match, j = *match, r = 0;
|
||||
(i < Tsize) && (j < Psize) && ((r = T[i] - P[j]) == 0); ++i, ++j) { }
|
||||
*match = j;
|
||||
return (r == 0) ? -(j != Psize) : r;
|
||||
}
|
||||
|
||||
/* Search for the pattern P in the string T. */
|
||||
saidx_t
|
||||
sa_search(const sauchar_t *T, saidx_t Tsize,
|
||||
const sauchar_t *P, saidx_t Psize,
|
||||
const saidx_t *SA, saidx_t SAsize,
|
||||
saidx_t *idx) {
|
||||
saidx_t size, lsize, rsize, half;
|
||||
saidx_t match, lmatch, rmatch;
|
||||
saidx_t llmatch, lrmatch, rlmatch, rrmatch;
|
||||
saidx_t i, j, k;
|
||||
saint_t r;
|
||||
|
||||
if(idx != NULL) { *idx = -1; }
|
||||
if((T == NULL) || (P == NULL) || (SA == NULL) ||
|
||||
(Tsize < 0) || (Psize < 0) || (SAsize < 0)) { return -1; }
|
||||
if((Tsize == 0) || (SAsize == 0)) { return 0; }
|
||||
if(Psize == 0) { if(idx != NULL) { *idx = 0; } return SAsize; }
|
||||
|
||||
for(i = j = k = 0, lmatch = rmatch = 0, size = SAsize, half = size >> 1;
|
||||
0 < size;
|
||||
size = half, half >>= 1) {
|
||||
match = MIN(lmatch, rmatch);
|
||||
r = _compare(T, Tsize, P, Psize, SA[i + half], &match);
|
||||
if(r < 0) {
|
||||
i += half + 1;
|
||||
half -= (size & 1) ^ 1;
|
||||
lmatch = match;
|
||||
} else if(r > 0) {
|
||||
rmatch = match;
|
||||
} else {
|
||||
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
|
||||
|
||||
/* left part */
|
||||
for(llmatch = lmatch, lrmatch = match, half = lsize >> 1;
|
||||
0 < lsize;
|
||||
lsize = half, half >>= 1) {
|
||||
lmatch = MIN(llmatch, lrmatch);
|
||||
r = _compare(T, Tsize, P, Psize, SA[j + half], &lmatch);
|
||||
if(r < 0) {
|
||||
j += half + 1;
|
||||
half -= (lsize & 1) ^ 1;
|
||||
llmatch = lmatch;
|
||||
} else {
|
||||
lrmatch = lmatch;
|
||||
}
|
||||
}
|
||||
|
||||
/* right part */
|
||||
for(rlmatch = match, rrmatch = rmatch, half = rsize >> 1;
|
||||
0 < rsize;
|
||||
rsize = half, half >>= 1) {
|
||||
rmatch = MIN(rlmatch, rrmatch);
|
||||
r = _compare(T, Tsize, P, Psize, SA[k + half], &rmatch);
|
||||
if(r <= 0) {
|
||||
k += half + 1;
|
||||
half -= (rsize & 1) ^ 1;
|
||||
rlmatch = rmatch;
|
||||
} else {
|
||||
rrmatch = rmatch;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
|
||||
return k - j;
|
||||
}
|
||||
|
||||
/* Search for the character c in the string T. */
|
||||
saidx_t
|
||||
sa_simplesearch(const sauchar_t *T, saidx_t Tsize,
|
||||
const saidx_t *SA, saidx_t SAsize,
|
||||
saint_t c, saidx_t *idx) {
|
||||
saidx_t size, lsize, rsize, half;
|
||||
saidx_t i, j, k, p;
|
||||
saint_t r;
|
||||
|
||||
if(idx != NULL) { *idx = -1; }
|
||||
if((T == NULL) || (SA == NULL) || (Tsize < 0) || (SAsize < 0)) { return -1; }
|
||||
if((Tsize == 0) || (SAsize == 0)) { return 0; }
|
||||
|
||||
for(i = j = k = 0, size = SAsize, half = size >> 1;
|
||||
0 < size;
|
||||
size = half, half >>= 1) {
|
||||
p = SA[i + half];
|
||||
r = (p < Tsize) ? T[p] - c : -1;
|
||||
if(r < 0) {
|
||||
i += half + 1;
|
||||
half -= (size & 1) ^ 1;
|
||||
} else if(r == 0) {
|
||||
lsize = half, j = i, rsize = size - half - 1, k = i + half + 1;
|
||||
|
||||
/* left part */
|
||||
for(half = lsize >> 1;
|
||||
0 < lsize;
|
||||
lsize = half, half >>= 1) {
|
||||
p = SA[j + half];
|
||||
r = (p < Tsize) ? T[p] - c : -1;
|
||||
if(r < 0) {
|
||||
j += half + 1;
|
||||
half -= (lsize & 1) ^ 1;
|
||||
}
|
||||
}
|
||||
|
||||
/* right part */
|
||||
for(half = rsize >> 1;
|
||||
0 < rsize;
|
||||
rsize = half, half >>= 1) {
|
||||
p = SA[k + half];
|
||||
r = (p < Tsize) ? T[p] - c : -1;
|
||||
if(r <= 0) {
|
||||
k += half + 1;
|
||||
half -= (rsize & 1) ^ 1;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if(idx != NULL) { *idx = (0 < (k - j)) ? j : i; }
|
||||
return k - j;
|
||||
}
|
||||
#endif
|
Loading…
x
Reference in New Issue
Block a user