antoine-source/appleworksgs/Spell/Src/CLAM.C
2023-03-04 03:45:20 +01:00

1 line
21 KiB
C
Executable File
Raw Permalink Blame History

/***********************************************************************\
Filename: clam.c
\***********************************************************************/
#include <memory.h>
#include <intmath.h>
#include "string.h"
#include "language.h"
#include "spmemory.h"
#include "clam.h"
#include "environ.h"
#include "sp.h"
#include "spdef.h"
#include <prodos.h>
unsigned clamfree();
static int clfetch();
static int clbinret();
/* storage structure for one word in a CLAM */
typedef struct
{
short ib_info; /* flags, or "block to descend to" */
char ib_bs; /* backspace of word from last word */
char ib_delta[1]; /* delta of word */
} INFOBSD;
#define ib(x) ((INFOBSD *)(x))
/* block information structure, at top of every block */
typedef struct
{
short st_nextb; /* index of next block on this level */
short st_used; /* bytes occupied by BLKSTATS and INFOBSDs */
} BLKSTATS;
#define bs(x) ((BLKSTATS *)(x))
char Cldecomp[LONGWORD+2]; /* The CLAM decompression buffer. */
static int Clinfo; /* The info for the CLAM word. */
static int Clused; /* The amount of space in the block. */
static char *Clwsave; /* The start of the current word. */
static char Clbin[3]; /* The bin characters */
#define Curclam ((CLAM *)Sccurlist) /* The current CLAM. */
#define CL_DATABIT 0x8000 /* bit for a data block. */
#define CL_NOBLK (-1) /* "no next block" indicator */
#define MAXDEPTH 6 /* maximum CLAM depth */
/* Read a block from the CLAM. */
static int clblock(block, flags)
int block; /* The block number to read. */
int flags;
{
extern char *memread();
register CLAM *cp = Curclam;
/* Read the block. */
if ((cp->cl_blk0 = memread(block, cp->cl_file, flags +
(block ? MM_PR1 : MM_PR0))) == NULL)
{
Scerror = ERR_CLAM | ERR_IO;
return (FALSE);
}
/* Initialize the decompression variables. */
Clused = bs(cp->cl_blk0)->st_used & (CL_DATABIT - 1);
cp->cl_blkptr = cp->cl_blk0 + sizeof(BLKSTATS);
Scendptr = Cldecomp;
return (TRUE);
}
/* Decompress the next word from a CLAM block. */
static int clgetword()
{
register char *ptr;
register char *dest;
register CLAM *cp = Curclam;
char *ptr0;
ptr = cp->cl_blkptr;
Clwsave = ptr;
/* If at end, set Clinfo to zero so clfindbin() will go to the
next. */
if (ptr - cp->cl_blk0 >= Clused)
{
Clinfo = 0;
return (FALSE);
}
Clinfo = ib(ptr)->ib_info;
dest = Scendptr - ib(ptr)->ib_bs;
ptr0 = ptr = ib(ptr)->ib_delta;
while (*dest++ = *ptr++)
;
Scendptr = dest - 1;
if (((ptr - ptr0) & 1) == 0) {
++ptr;
}
cp->cl_blkptr = ptr;
return (TRUE);
}
/* Advance the CLAM to the next word with non-zero flags. */
static int cladvance()
{
while (1) {
/* Decompress one word. */
while (clgetword()) {
if (Clinfo) {
return (TRUE);
}
}
/* Link to the next block. */
if (Sccurblk == 0 || (Sccurblk = bs(Curclam->cl_blk0)->st_nextb)
== CL_NOBLK)
return (FALSE);
/* And read it. */
if (!clblock(Sccurblk, MM_READ))
return (FALSE);
}
}
/* Return non-zero if the current block is a data block. */
static int clisdata()
{
return (bs(Curclam->cl_blk0)->st_used & CL_DATABIT);
}
/* Encode a word for CLAM lookup. */
static int clencode(word, peword)
char *word; /* The word to encode */
char *peword; /* The encoded word */
{
extern VOID phfull();
/* Check the word length. */
if (!word[0] || strlen((char *) word) >= LONGWORD)
{
Scerror = ERR_CLAM | ERR_WLEN;
return (FALSE);
}
/* Phoneticallly encode the word. */
phfull(word, peword, FALSE);
return (TRUE);
}
/* Form an INFOBSD out of the arguments and write it to ibptr. */
static char *
makeib(ibptr, info, prev, word)
char *ibptr;
int info;
char *prev;
char *word;
{
int bsval;
ib(ibptr)->ib_info = info;
if (!prev)
bsval = 0;
else
{
bsval = strdiff((char *)prev, (char *)word);
word += bsval;
bsval = strlen((char *)prev) - bsval;
}
ib(ibptr)->ib_bs = bsval;
ibptr = ib(ibptr)->ib_delta;
prev = word;
while (*ibptr++ = *word++)
;
if (((word - prev) & 1) == 0) {
*ibptr++ = 0;
}
return (ibptr);
}
/* Open a CLAM file. `fname' is the name of the file to open. The flags
argument can contain CL_INIT to create a new CLAM, CL_READONLY to open a
CLAM that is not to be modified, and CL_REORG to allow block splitting at
the end of a block instead of the middle.
Returns a pointer to the structure for the CLAM, or NULL on error.
Every clamalloc() call without CL_READONLY must have a corresponding
clamclose() call in order to make all CLAM updates be written to disk.
*/
CLAM *clamalloc(fname, flags, ftype, atype)
char *fname;
int flags;
int ftype; /* File type if new clam */
long atype; /* Auxtype if new clam */
{
CLAM *cp; /* location of new CLAM structure */
long bytes; /* size of the file in bytes. */
char firstword[3]; /* empty word to insert first */
/* Don't need these... <20><>MSL 7/25/89
struct PrefixRec prec;
char pbuf[30];
*/
/* allocate the new structure */
if (!(cp = (CLAM *) zalloc(_SPID, sizeof(CLAM))))
{
Scerror = ERR_CLAM | ERR_ALLOC;
return (NULL);
}
Sccurlist = (char *) cp;
/* ok here... */
/* Initialize the CLAM if the user specifies this. */
if (flags & CL_INIT)
{
/* create the CLAM file. */
if ((cp->cl_file = stdcreate(fname, ftype, atype)) == H_ERROR)
{
clamfree(cp);
return (NULL);
}
/* Get a buffer for the first block of the file. */
if (!clblock(0, MM_WRITE))
{
clamfree(cp);
return (NULL);
}
/* initialize first block's BLKSTATS structure */
cp->cl_screv = CL_REV;
cp->cl_nblks = 1;
cp->cl_blk0[0] = CL_REV;
/* Initialize the CLAM data with a null word. */
firstword[0] = firstword[1] = MINALPH;
firstword[2] = 0;
bs(cp->cl_blk0)->st_used = (makeib(cp->cl_blk0 + sizeof(BLKSTATS),
0, (char *)0, firstword) - cp->cl_blk0) | CL_DATABIT;
}
else
{
/* open the file */
/* ok here */
/* Why should we do a GetPrefix? <20><>MSL 7/25/89
prec.prefixNum = 1;
prec.prefix = pbuf;
GET_PREFIX(&prec);
*/
if ((cp->cl_file = stdopen(fname, (flags & CL_READONLY) ? H_RDONLY : H_RDWR)) == H_ERROR)
{
Scerror = ERR_CLAM | ERR_ACCESS;
clamfree(cp);
return (NULL);
}
/* NOT OK HERE!!! */
/* Get the size of the file. */
if ((bytes = stdend(cp->cl_file)) < 0)
{
Scerror = ERR_CLAM | ERR_IO;
clamfree(cp);
return (NULL);
}
cp->cl_nblks = (int)(bytes / MM_BLKSIZE);
/* Get the first block of the file. */
if (!clblock(0, MM_READ))
{
clamfree(cp);
return (NULL);
}
}
cp->cl_flags = flags & ~CL_LATEST;
cp->cl_searchb = CL_NOBLK;
/* not ok here */
return (cp);
}
/* Close the CLAM file and free the memory for the CLAM structure. */
unsigned clamfree(cp)
CLAM *cp;
{
unsigned err=OKAY;
if (cp->cl_file != H_ERROR)
{
err = memflush(cp->cl_file);
stdclose(cp->cl_file);
}
nzfree(cp);
return err;
}
/* Look for a word in a CLAM; if it is found, returned its flags otherwise
return IW_NOTFOUND. */
clamword(word, cp)
char *word;
CLAM *cp;
{
int tmp;
char peword[2*MAXWORD]; /* phonetic form of input word */
if (!clencode(word, peword))
return (IW_NOTFOUND);
if (!clfetch(peword, FALSE))
return (IW_NOTFOUND);
return (Clinfo);
}
/* Fetch a word from a CLAM. If this word is not in the CLAM, the CLAM
decompression pointers will point to the smallest word greater than or
equal to the argument. */
static int clfetch(peword, find)
char *peword; /* The word to look for */
int find; /* Find the data block. */
{
int tmp;
/* Look through the index blocks for the data block with could
contain the word. */
tmp = 0;
while (1)
{
/* Read an index or data block. */
if (!clblock(tmp, MM_READ))
return (FALSE);
/* Stop the loop if this is a data block. */
if (clisdata())
break;
/* The index block consists of a sequence of words, each
associated with a block number. The block associated
with a given index word contains words greater than or
equal to the index word and less than the next index
word. */
tmp = 0;
while (clgetword() && strcmp(peword, Cldecomp) >= 0)
tmp = Clinfo;
/* if word comes before first word in the current block, we
know it isn't in the clam. */
if (!find && tmp == 0)
return (FALSE);
}
Sccurblk = tmp;
/* Decompress the block until a word is found greater than or equal
to the query. */
while (clgetword() && (tmp = strcmp(peword, Cldecomp)) >= 0)
if (tmp == 0)
return (TRUE);
/* The word is not in the CLAM. */
return (FALSE);
}
/* Read words from a CLAM. If init is TRUE, the first word of the CLAM is
returned. If init is FALSE, the next word is returned. The words are
sorted by their internal coding. The return values are: OKAY, CL_EOF,
and ERROR. Cl_EOF is returned when there are no more words in the CLAM.
*/
clamnext(buffer, flagsptr, init, cp)
char *buffer; /* The buffer to return the word in. */
int *flagsptr; /* Where to return the word's info. */
int init; /* Return first word flag. */
CLAM *cp; /* The CLAM to look in. */
{
int phonchr;
char *src;
int prevchr;
char firstword[3];
if (init)
{
/* This is the dummy word at the start of the CLAM. */
firstword[0] = firstword[1] = MINALPH;
firstword[2] = 0;
/* Position to that word. */
if (!clfetch(firstword, TRUE))
return (ERROR);
}
else
{
/* Restore the decompression state from the CLAM structure. */
if (cp->cl_searchb < 0 || !clblock(cp->cl_searchb, MM_READ))
return (ERROR);
cp->cl_blkptr = cp->cl_blk0 + cp->cl_blksav;
Scendptr = strecpy(Cldecomp, cp->cl_word);
Sccurblk = cp->cl_searchb;
}
/* Find the next word. */
if (!cladvance())
{
cp->cl_searchb = CL_NOBLK;
return (CL_EOF);
}
/* Save the decompression information in the clam structure. */
cp->cl_searchb = Sccurblk;
cp->cl_blksav = cp->cl_blkptr - cp->cl_blk0;
strecpy(cp->cl_word, Cldecomp);
/* Return the info stored with the word. */
*flagsptr = Clinfo;
/* If the word is a special, return just the special part. */
if (Clinfo & IW_SPECIAL)
{
strecpy(buffer, strchr(Cldecomp + 2, E_LRIND) + 1);
return (OKAY);
}
/* The word is not a special; decode the phonetics. */
src = Cldecomp + 2;
while (*src)
{
switch (phonchr = Engphtab[*src++][0])
{
case E_FILL:
break;
case E_REPEAT:
*buffer++ = prevchr;
break;
default:
prevchr = *buffer++ = phonchr;
}
}
*buffer = 0;
return (OKAY);
}
/* Locate first word of a CLAM bin. The word found is returned in
Scdecomp. */
clfindbin(binstr)
char *binstr;
{
Clbin[0] = binstr[0];
Clbin[1] = binstr[1];
Clbin[2] = 0;
if (clfetch(Clbin, TRUE) || Clinfo == 0)
return (clbinnext());
return (clbinret(Clbin));
}
/* Using the last decompressed word at lastptr, decompress next word from
CLAM bin, putting it in clword. At bin boundary, return NULL, otherwise
return new lastptr. */
clbinnext()
{
/* Next, advance to the next word; stop if not in the same bin. */
if (!cladvance())
return (FALSE);
/* Check the word and return it. */
return (clbinret(Clbin));
}
static int clbinret(bin)
char *bin; /* The bin the word must be in. */
{
char *source;
char *dest;
/* Return if the word is not in the desired bin. */
if (bin[0] != Cldecomp[0] || bin[1] != Cldecomp[1])
return (FALSE);
/* Store the flags. */
Scfdecomp[0] = Clinfo;
Scfdecomp[1] = 0;
Sctag = 0;
/* Remove the hyphenation characters and put the word into
Scdecomp. */
source = Cldecomp;
dest = Scdecomp;
while (*source)
{
if (*source == E_LRIND)
{
strecpy(dest, source);
return (TRUE);
}
else
*dest++ = *source;
++source;
}
*dest = 0;
return (TRUE);
}
/* Add a word to the CLAM. */
clamadd(word, flags, cp)
char *word; /* The word to add. */
int flags; /* The word's flags. */
CLAM *cp; /* The CLAM to add to. */
{
char *bptr;
int len;
char *dptr;
int tmp;
char *wptr;
int level;
int isdata;
int block[MAXDEPTH]; /* block numbers */
char peword[2*MAXWORD]; /* phonetically encoded word */
char *buffer;
buffer = zalloc(_SPID, MM_BLKSIZE + sizeof(INFOBSD) + LONGWORD + 3);
Sccurlist = (char *) cp;
/* Encode the word. */
if (!clencode(word, peword))
{
nzfree(buffer);
return (ERROR);
}
/* Find the data block which the word is to be put into. When this
loop is done, level will be the depth of the tree,
block[0:level-1] will contain the block numbers traversed. */
level = block[0] = 0;
while (1)
{
if (!clblock(block[level], MM_READ))
{
nzfree(buffer);
return (ERROR);
}
if (clisdata())
break;
len = 0;
while (clgetword() && strcmp(peword, Cldecomp) >= 0)
len = Clinfo;
block[++level] = len;
}
/* Insert the data; split blocks as necessary. If the root block is
split, the program breaks out from the loop, otherwise it returns
when a block does not need splitting. At the top of the loop
block[level] is the block to insert into, peword is the word to
insert, and flags is the info to be associated with it. */
while (level >= 0)
{
/* Read an index or data block. */
if (!clblock(block[level], MM_READ|MM_WRITE))
{
nzfree(buffer);
return (ERROR);
}
isdata = clisdata();
/* Decompress the block until a word is found greater than
or equal to the word to be inserted. If the word is
equal to the word to be inserted then this will be a data
block; the word's flags will be updated. When this code
exits, dptr is the insertion point, Scdecomp contains the
word before the insertion. If the insertion is at the
end of the block, tmp is -1; otherwise, Cldecomp contains
the word at the insertion and tmp is its info.
Clamptr->cl_blkptr points to the data after the word at
the insertion. */
Cldecomp[0] = 0;
while (1)
{
strecpy(Scdecomp, Cldecomp);
if (!clgetword())
{
len = -1;
break;
}
if ((len = strcmp(peword, Cldecomp)) == 0)
{
ib(Clwsave)->ib_info = flags;
nzfree(buffer);
return (OKAY);
}
if (len < 0)
{
len = Clinfo;
break;
}
}
dptr = Clwsave;
tmp = len;
/* Construct in `buffer' the entry for the word that is
being inserted. bptr will point to the end of the
entry. */
bptr = makeib(buffer, flags, Scdecomp, peword);
/* If the insertion was not at the end of the block, add the
word at the insertion point and then copy the reaminder
of the block into the buffer. */
if (tmp >= 0)
{
/* Construct the word after the inserted word. */
bptr = makeib(bptr, tmp, peword, Cldecomp);
/* Copy the remainder of the block. */
len = Clused - (cp->cl_blkptr - cp->cl_blk0);
scmove(bptr, cp->cl_blkptr, len);
bptr += len;
}
/* Now, the new block exists in two parts: from blk0 to
dptr, and from buffer to bptr. If the total is smaller
than one block, construct the new block from the data and
return. */
len = (dptr - cp->cl_blk0) + (bptr - buffer);
bs(cp->cl_blk0)->st_used = len | isdata;
if (len <= MM_BLKSIZE)
{
scmove(dptr, buffer, bptr - buffer);
nzfree(buffer);
return (OKAY);
}
/* The block has to be split: first, create the whole block
in the buffer. Bptr is the end of data in the buffer. */
len = dptr - cp->cl_blk0;
scmove(buffer + len, buffer, bptr - buffer);
scmove(buffer, cp->cl_blk0, len);
bptr += len;
/* Now, find the place to split. If the reorg flag is on
and the word was added to the end of the block then
split near the end; otherwise, split near the middle. */
len = MM_BLKSIZE;
if (tmp >= 0 || !(cp->cl_flags & CL_REORG))
len = MM_BLKSIZE / 2;
dptr = buffer + len;
/* Find the last word in the buffer whose start is before
the split position. Wptr will point to the word that is
to go in the second block. Scdecomp will contain the
first word of the first block. This will be used if this
is the root block. Cldecomp and Clinfo define the first
word for the second block. Clamptr->cl_blkptr points to
the rest of the words to be put in the second block. */
wptr = cp->cl_blk0;
Clused = MM_BLKSIZE + 1;
cp->cl_blk0 = buffer;
cp->cl_blkptr = buffer + sizeof(BLKSTATS);
Scendptr = Cldecomp;
clgetword();
strecpy(Scdecomp, Cldecomp);
do
clgetword();
while (cp->cl_blkptr <= dptr);
cp->cl_blk0 = wptr;
wptr = cp->cl_blkptr;
/* Now, make the first block. Tmp returns the next block,
this is stored in the second block. Wptr is the
info after the word being inserted before. */
len = Clwsave - buffer;
dptr = cp->cl_blk0;
scmove(dptr + sizeof(BLKSTATS), buffer + sizeof(BLKSTATS),
len - sizeof(BLKSTATS));
bs(dptr)->st_used = len | isdata;
tmp = level ? bs(dptr)->st_nextb : CL_NOBLK;
bs(dptr)->st_nextb = cp->cl_nblks;
/* Get a block to put the second part into. */
if (!clblock(cp->cl_nblks, MM_WRITE))
{
nzfree(buffer);
return (ERROR);
}
/* Put the first word into the buffer. */
dptr = makeib(cp->cl_blk0 + sizeof(BLKSTATS), Clinfo, (char *)0,
Cldecomp);
/* Copy the remaining words and set up the block header. */
len = bptr - wptr;
scmove(dptr, wptr, len);
bptr = cp->cl_blk0;
bs(bptr)->st_nextb = tmp;
bs(bptr)->st_used = (dptr + len - bptr) | isdata;
/* Set up info for the next iteration of the loop. */
strecpy(peword, Cldecomp);
flags = cp->cl_nblks++;
--level;
}
/* Put the old root block at the end of the CLAM. */
if (!memswitch(cp->cl_file, 0, cp->cl_nblks++))
{
nzfree(buffer);
return (ERROR);
}
/* Get the root block. */
if (!clblock(0, MM_WRITE))
{
nzfree(buffer);
return (ERROR);
}
/* Construct the words of the block. */
dptr = makeib(cp->cl_blkptr, cp->cl_nblks-1, (char *) 0, Scdecomp);
dptr = makeib(dptr, cp->cl_nblks - 2, Scdecomp, Cldecomp);
/* Set up the block header. */
bptr = cp->cl_blk0;
bptr[0] = CL_REV;
bs(bptr)->st_used = dptr - bptr;
/* All done. */
nzfree(buffer);
return (OKAY);
}