antoine-source/appleworksgs/Spell/Src/CLAM.C

/***********************************************************************\

    Filename: clam.c

\***********************************************************************/

#include <memory.h>
#include <intmath.h>
#include "string.h"
#include "language.h"
#include "spmemory.h"
#include "clam.h"
#include "environ.h"
#include "sp.h"
#include "spdef.h"
#include <prodos.h>

unsigned clamfree();
static int clfetch();
static int clbinret();

/* storage structure for one word in a CLAM */

typedef struct
{
    short  ib_info;        /* flags, or "block to descend to" */
    char   ib_bs;          /* backspace of word from last word */
    char   ib_delta[1];    /* delta of word */
} INFOBSD;

#define ib(x)           ((INFOBSD *)(x))

/* block information structure, at top of every block */

typedef struct
{
    short   st_nextb;       /* index of next block on this level */
    short   st_used;        /* bytes occupied by BLKSTATS and INFOBSDs */
} BLKSTATS;

#define bs(x)           ((BLKSTATS *)(x))

char    Cldecomp[LONGWORD+2];     /* The CLAM decompression buffer. */
static  int Clinfo;              /* The info for the CLAM word. */
static  int Clused;              /* The amount of space in the block. */
static  char *Clwsave;          /* The start of the current word. */
static  char Clbin[3];          /* The bin characters */

#define Curclam    ((CLAM *)Sccurlist) /* The current CLAM. */
#define CL_DATABIT 0x8000       /* bit for a data block. */
#define CL_NOBLK   (-1)         /* "no next block" indicator */
#define MAXDEPTH   6            /* maximum CLAM depth */

/* Read a block from the CLAM. */

static int clblock(block, flags)
int     block;                  /* The block number to read. */
int     flags;
{
extern char *memread();
register CLAM *cp = Curclam;

    /* Read the block. */

    if ((cp->cl_blk0 = memread(block, cp->cl_file, flags +
      (block ? MM_PR1 : MM_PR0))) == NULL)
    {
        Scerror = ERR_CLAM | ERR_IO;
        return (FALSE);
    }
    /* Initialize the decompression variables. */

    Clused = bs(cp->cl_blk0)->st_used & (CL_DATABIT - 1);
    cp->cl_blkptr = cp->cl_blk0 + sizeof(BLKSTATS);
    Scendptr = Cldecomp;
    return (TRUE);
}

/* Decompress the next word from a CLAM block.  */

static int clgetword()
{
register char *ptr;
register char *dest;
register CLAM *cp = Curclam;
char   *ptr0;

    ptr = cp->cl_blkptr;
    Clwsave = ptr;

    /* If at end, set Clinfo to zero so clfindbin() will go to the
       next.  */

    if (ptr - cp->cl_blk0 >= Clused)
    {
        Clinfo = 0;
        return (FALSE);
    }
    Clinfo = ib(ptr)->ib_info;
    dest = Scendptr - ib(ptr)->ib_bs;
    ptr0 = ptr = ib(ptr)->ib_delta;
    while (*dest++ = *ptr++)
        ;
    Scendptr = dest - 1;
    if (((ptr - ptr0) & 1) == 0) {
        ++ptr;
    }
    cp->cl_blkptr = ptr;
    return (TRUE);
}

/* Advance the CLAM to the next word with non-zero flags. */

static int cladvance()
{
    while (1) {
        /* Decompress one word. */

        while (clgetword()) {
            if (Clinfo) {
                return (TRUE);
            }
        }
        /* Link to the next block. */

        if (Sccurblk == 0 || (Sccurblk = bs(Curclam->cl_blk0)->st_nextb)
          == CL_NOBLK)
            return (FALSE);

        /* And read it. */

        if (!clblock(Sccurblk, MM_READ))
            return (FALSE);
    }
}

/* Return non-zero if the current block is a data block. */

static int clisdata()
{
    return (bs(Curclam->cl_blk0)->st_used & CL_DATABIT);
}

/* Encode a word for CLAM lookup. */

static int clencode(word, peword)
char   *word;                  /* The word to encode */
char   *peword;                /* The encoded word */
{
extern VOID phfull();

    /* Check the word length. */

    if (!word[0] || strlen((char *) word) >= LONGWORD)
    {
        Scerror = ERR_CLAM | ERR_WLEN;
        return (FALSE);
    }

    /* Phoneticallly encode the word. */

    phfull(word, peword, FALSE);

    return (TRUE);
}

/* Form an INFOBSD out of the arguments and write it to ibptr.  */

static char *
makeib(ibptr, info, prev, word)
char *ibptr;
int  info;
char *prev;
char *word;
{
int bsval;

    ib(ibptr)->ib_info = info;
    if (!prev)
        bsval = 0;
    else
    {
        bsval = strdiff((char *)prev, (char *)word);
        word += bsval;
        bsval = strlen((char *)prev) - bsval;
    }
    ib(ibptr)->ib_bs = bsval;

    ibptr = ib(ibptr)->ib_delta;
    prev = word;
    while (*ibptr++ = *word++)
        ;
    if (((word - prev) & 1) == 0) {
        *ibptr++ = 0;
    }
    return (ibptr);
}

/* Open a CLAM file.  `fname' is the name of the file to open.  The flags
   argument can contain CL_INIT to create a new CLAM, CL_READONLY to open a
   CLAM that is not to be modified, and CL_REORG to allow block splitting at
   the end of a block instead of the middle.

   Returns a pointer to the structure for the CLAM, or NULL on error.

   Every clamalloc() call without CL_READONLY must have a corresponding
   clamclose() call in order to make all CLAM updates be written to disk.
   */

CLAM *clamalloc(fname, flags, ftype, atype)
char    *fname;
int flags;
int ftype;      /* File type if new clam */
long atype;      /* Auxtype if new clam */
{
CLAM    *cp;                    /* location of new CLAM structure */
long    bytes;                  /* size of the file in bytes. */
char   firstword[3];           /* empty word to insert first */
/*	Don't need these... <20><>MSL 7/25/89
struct PrefixRec prec;
char pbuf[30];
*/
    /* allocate the new structure */

    if (!(cp = (CLAM *) zalloc(_SPID, sizeof(CLAM))))
    {
        Scerror = ERR_CLAM | ERR_ALLOC;
        return (NULL);
    }
    Sccurlist = (char *) cp;

    /*  ok here... */

    /* Initialize the CLAM if the user specifies this. */

    if (flags & CL_INIT)
    {
        /* create the CLAM file. */

        if ((cp->cl_file = stdcreate(fname, ftype, atype)) == H_ERROR)
        {
            clamfree(cp);
            return (NULL);
        }

        /* Get a buffer for the first block of the file. */

        if (!clblock(0, MM_WRITE))
        {
            clamfree(cp);
            return (NULL);
        }

        /* initialize first block's BLKSTATS structure */

        cp->cl_screv = CL_REV;
        cp->cl_nblks = 1;
        cp->cl_blk0[0] = CL_REV;

        /* Initialize the CLAM data with a null word. */

        firstword[0] = firstword[1] = MINALPH;
        firstword[2] = 0;
        bs(cp->cl_blk0)->st_used = (makeib(cp->cl_blk0 + sizeof(BLKSTATS),
          0, (char *)0, firstword) - cp->cl_blk0) | CL_DATABIT;
    }
    else
    {
        /* open the file */

        /* ok here */

/*  						Why should we do a GetPrefix? <20><>MSL 7/25/89
		prec.prefixNum = 1;
        prec.prefix = pbuf;
        GET_PREFIX(&prec);
*/

        if ((cp->cl_file = stdopen(fname, (flags & CL_READONLY) ? H_RDONLY : H_RDWR)) == H_ERROR)
        {
            Scerror = ERR_CLAM | ERR_ACCESS;
            clamfree(cp);
            return (NULL);
        }

        /* NOT OK HERE!!! */

        /* Get the size of the file. */

        if ((bytes = stdend(cp->cl_file)) < 0)
        {
            Scerror = ERR_CLAM | ERR_IO;
            clamfree(cp);
            return (NULL);
        }

		cp->cl_nblks = (int)(bytes / MM_BLKSIZE);

        /* Get the first block of the file. */

        if (!clblock(0, MM_READ))
        {
            clamfree(cp);
            return (NULL);
        }
    }
    cp->cl_flags = flags & ~CL_LATEST;
    cp->cl_searchb = CL_NOBLK;

    /* not ok here */

    return (cp);
}

/* Close the CLAM file and free the memory for the CLAM structure. */

unsigned clamfree(cp)
CLAM *cp;
{
	unsigned err=OKAY;

    if (cp->cl_file != H_ERROR)
    {
        err = memflush(cp->cl_file);
        stdclose(cp->cl_file);
    }
    nzfree(cp);
	return err;
}

/* Look for a word in a CLAM; if it is found, returned its flags otherwise
   return IW_NOTFOUND.  */

clamword(word, cp)
char   *word;
CLAM    *cp;
{
int  tmp;
char peword[2*MAXWORD]; /* phonetic form of input word */

    if (!clencode(word, peword))
        return (IW_NOTFOUND);

    if (!clfetch(peword, FALSE))
        return (IW_NOTFOUND);

    return (Clinfo);
}

/* Fetch a word from a CLAM.  If this word is not in the CLAM, the CLAM
   decompression pointers will point to the smallest word greater than or
   equal to the argument.  */

static int clfetch(peword, find)
char    *peword;                /* The word to look for */
int     find;                   /* Find the data block. */
{
int tmp;

    /* Look through the index blocks for the data block with could
       contain the word.  */

    tmp = 0;
    while (1)
    {
        /* Read an index or data block. */

        if (!clblock(tmp, MM_READ))
            return (FALSE);

        /* Stop the loop if this is a data block. */

        if (clisdata())
            break;

        /* The index block consists of a sequence of words, each
           associated with a block number.  The block associated
           with a given index word contains words greater than or
           equal to the index word and less than the next index
           word.  */

        tmp = 0;
        while (clgetword() && strcmp(peword, Cldecomp) >= 0)
            tmp = Clinfo;

        /* if word comes before first word in the current block, we
           know it isn't in the clam. */

        if (!find && tmp == 0)
            return (FALSE);
    }
    Sccurblk = tmp;

    /* Decompress the block until a word is found greater than or equal
       to the query.  */

    while (clgetword() && (tmp = strcmp(peword, Cldecomp)) >= 0)
        if (tmp == 0)
            return (TRUE);

    /* The word is not in the CLAM. */

    return (FALSE);
}

/* Read words from a CLAM.  If init is TRUE, the first word of the CLAM is
   returned.  If init is FALSE, the next word is returned.  The words are
   sorted by their internal coding.  The return values are: OKAY, CL_EOF,
   and ERROR.  Cl_EOF is returned when there are no more words in the CLAM.
   */

clamnext(buffer, flagsptr, init, cp)
char    *buffer;            /* The buffer to return the word in. */
int     *flagsptr;          /* Where to return the word's info. */
int     init;               /* Return first word flag. */
CLAM    *cp;                /* The CLAM to look in. */
{
int phonchr;
char *src;
int  prevchr;
char firstword[3];

    if (init)
    {
        /* This is the dummy word at the start of the CLAM. */

        firstword[0] = firstword[1] = MINALPH;
        firstword[2] = 0;

        /* Position to that word. */

        if (!clfetch(firstword, TRUE))
            return (ERROR);
    }
    else
    {
        /* Restore the decompression state from the CLAM structure. */

        if (cp->cl_searchb < 0 || !clblock(cp->cl_searchb, MM_READ))
            return (ERROR);
        cp->cl_blkptr = cp->cl_blk0 + cp->cl_blksav;
        Scendptr = strecpy(Cldecomp, cp->cl_word);
        Sccurblk = cp->cl_searchb;
    }

    /* Find the next word. */

    if (!cladvance())
    {
        cp->cl_searchb = CL_NOBLK;
        return (CL_EOF);
    }

    /* Save the decompression information in the clam structure. */

    cp->cl_searchb = Sccurblk;
    cp->cl_blksav = cp->cl_blkptr - cp->cl_blk0;
    strecpy(cp->cl_word, Cldecomp);

    /* Return the info stored with the word. */

    *flagsptr = Clinfo;

    /* If the word is a special, return just the special part.  */

    if (Clinfo & IW_SPECIAL)
    {
        strecpy(buffer, strchr(Cldecomp + 2, E_LRIND) + 1);
        return (OKAY);
    }

    /* The word is not a special; decode the phonetics.  */

    src = Cldecomp + 2;
    while (*src)
    {
        switch (phonchr = Engphtab[*src++][0])
        {
            case E_FILL:
                break;
            case E_REPEAT:
                *buffer++ = prevchr;
                break;
            default:
                prevchr = *buffer++ = phonchr;
        }
    }
    *buffer = 0;
    return (OKAY);
}

/* Locate first word of a CLAM bin.  The word found is returned in
   Scdecomp.  */

clfindbin(binstr)
char *binstr;
{
    Clbin[0] = binstr[0];
    Clbin[1] = binstr[1];
    Clbin[2] = 0;

    if (clfetch(Clbin, TRUE) || Clinfo == 0)
        return (clbinnext());
    return (clbinret(Clbin));
}

/* Using the last decompressed word at lastptr, decompress next word from
   CLAM bin, putting it in clword.  At bin boundary, return NULL, otherwise
   return new lastptr.  */

clbinnext()
{
    /* Next, advance to the next word; stop if not in the same bin. */

    if (!cladvance())
        return (FALSE);

    /* Check the word and return it. */

    return (clbinret(Clbin));
}

static int clbinret(bin)
char *bin;                   /* The bin the word must be in. */
{
char *source;
char *dest;

    /* Return if the word is not in the desired bin. */

    if (bin[0] != Cldecomp[0] || bin[1] != Cldecomp[1])
        return (FALSE);

    /* Store the flags. */

    Scfdecomp[0] = Clinfo;
    Scfdecomp[1] = 0;
    Sctag = 0;

        /* Remove the hyphenation characters and put the word into
           Scdecomp.  */

    source = Cldecomp;
    dest = Scdecomp;
    while (*source)
    {
        if (*source == E_LRIND)
        {
            strecpy(dest, source);
            return (TRUE);
        }
        else
            *dest++ = *source;
        ++source;
    }
    *dest = 0;
    return (TRUE);
}

/* Add a word to the CLAM. */

clamadd(word, flags, cp)
char    *word;                  /* The word to add. */
int     flags;                  /* The word's flags. */
CLAM    *cp;                    /* The CLAM to add to. */
{
char    *bptr;
int     len;
char    *dptr;
int     tmp;
char    *wptr;
int     level;
int     isdata;
int     block[MAXDEPTH];        /* block numbers */
char    peword[2*MAXWORD];      /* phonetically encoded word */
char    *buffer;

    buffer = zalloc(_SPID, MM_BLKSIZE + sizeof(INFOBSD) + LONGWORD + 3);

    Sccurlist = (char *) cp;

    /* Encode the word. */

    if (!clencode(word, peword))
    {
        nzfree(buffer);
        return (ERROR);
    }

        /* Find the data block which the word is to be put into.  When this
           loop is done, level will be the depth of the tree,
           block[0:level-1] will contain the block numbers traversed.  */

    level = block[0] = 0;
    while (1)
    {
        if (!clblock(block[level], MM_READ))
        {
            nzfree(buffer);
            return (ERROR);
        }
        if (clisdata())
            break;
        len = 0;
        while (clgetword() && strcmp(peword, Cldecomp) >= 0)
            len = Clinfo;
        block[++level] = len;
    }

    /* Insert the data; split blocks as necessary.  If the root block is
       split, the program breaks out from the loop, otherwise it returns
       when a block does not need splitting.  At the top of the loop
       block[level] is the block to insert into, peword is the word to
       insert, and flags is the info to be associated with it.  */

    while (level >= 0)
    {
        /* Read an index or data block. */

        if (!clblock(block[level], MM_READ|MM_WRITE))
        {
            nzfree(buffer);
            return (ERROR);
        }
        isdata = clisdata();

        /* Decompress the block until a word is found greater than
           or equal to the word to be inserted.  If the word is
           equal to the word to be inserted then this will be a data
           block; the word's flags will be updated.  When this code
           exits, dptr is the insertion point, Scdecomp contains the
           word before the insertion.  If the insertion is at the
           end of the block, tmp is -1; otherwise, Cldecomp contains
           the word at the insertion and tmp is its info.
           Clamptr->cl_blkptr points to the data after the word at
           the insertion.  */

        Cldecomp[0] = 0;
        while (1)
        {
            strecpy(Scdecomp, Cldecomp);
            if (!clgetword())
            {
                len = -1;
                break;
            }
            if ((len = strcmp(peword, Cldecomp)) == 0)
            {
                ib(Clwsave)->ib_info = flags;
                nzfree(buffer);
                return (OKAY);
            }
            if (len < 0)
            {
                len = Clinfo;
                break;
            }
        }
        dptr = Clwsave;
        tmp = len;

        /* Construct in `buffer' the entry for the word that is
           being inserted.  bptr will point to the end of the
           entry.  */

        bptr = makeib(buffer, flags, Scdecomp, peword);

        /* If the insertion was not at the end of the block, add the
           word at the insertion point and then copy the reaminder
           of the block into the buffer.  */

        if (tmp >= 0)
        {
            /* Construct the word after the inserted word. */

            bptr = makeib(bptr, tmp, peword, Cldecomp);

            /* Copy the remainder of the block. */

            len = Clused - (cp->cl_blkptr - cp->cl_blk0);
            scmove(bptr, cp->cl_blkptr, len);
            bptr += len;
        }

        /* Now, the new block exists in two parts: from blk0 to
           dptr, and from buffer to bptr.  If the total is smaller
           than one block, construct the new block from the data and
           return.  */

        len = (dptr - cp->cl_blk0) + (bptr - buffer);
        bs(cp->cl_blk0)->st_used = len | isdata;
        if (len <= MM_BLKSIZE)
        {
            scmove(dptr, buffer, bptr - buffer);
            nzfree(buffer);
            return (OKAY);
        }

        /* The block has to be split: first, create the whole block
           in the buffer.  Bptr is the end of data in the buffer. */

        len = dptr - cp->cl_blk0;
        scmove(buffer + len, buffer, bptr - buffer);
        scmove(buffer, cp->cl_blk0, len);
        bptr += len;

        /* Now, find the place to split. If the reorg flag is on
           and the word was added to the end of the block then
           split near the end; otherwise, split near the middle. */

        len = MM_BLKSIZE;
        if (tmp >= 0 || !(cp->cl_flags & CL_REORG))
            len = MM_BLKSIZE / 2;
        dptr = buffer + len;

        /* Find the last word in the buffer whose start is before
           the split position.  Wptr will point to the word that is
           to go in the second block.  Scdecomp will contain the
           first word of the first block.  This will be used if this
           is the root block.  Cldecomp and Clinfo define the first
           word for the second block.  Clamptr->cl_blkptr points to
           the rest of the words to be put in the second block.  */

        wptr = cp->cl_blk0;
        Clused = MM_BLKSIZE + 1;
        cp->cl_blk0 = buffer;
        cp->cl_blkptr = buffer + sizeof(BLKSTATS);
        Scendptr = Cldecomp;
        clgetword();
        strecpy(Scdecomp, Cldecomp);
        do
            clgetword();
        while (cp->cl_blkptr <= dptr);
        cp->cl_blk0 = wptr;
        wptr = cp->cl_blkptr;

        /* Now, make the first block.  Tmp returns the next block,
           this is stored in the second block.  Wptr is the
           info after the word being inserted before. */

        len = Clwsave - buffer;
        dptr = cp->cl_blk0;
        scmove(dptr + sizeof(BLKSTATS), buffer + sizeof(BLKSTATS),
          len - sizeof(BLKSTATS));
        bs(dptr)->st_used = len | isdata;
        tmp = level ? bs(dptr)->st_nextb : CL_NOBLK;
        bs(dptr)->st_nextb = cp->cl_nblks;

        /* Get a block to put the second part into. */

        if (!clblock(cp->cl_nblks, MM_WRITE))
        {
            nzfree(buffer);
            return (ERROR);
        }

        /* Put the first word into the buffer. */

        dptr = makeib(cp->cl_blk0 + sizeof(BLKSTATS), Clinfo, (char *)0,
          Cldecomp);

        /* Copy the remaining words and set up the block header. */

        len = bptr - wptr;
        scmove(dptr, wptr, len);
        bptr = cp->cl_blk0;
        bs(bptr)->st_nextb = tmp;
        bs(bptr)->st_used = (dptr + len - bptr) | isdata;

        /* Set up info for the next iteration of the loop. */

        strecpy(peword, Cldecomp);
        flags = cp->cl_nblks++;
        --level;
    }

    /* Put the old root block at the end of the CLAM. */

    if (!memswitch(cp->cl_file, 0, cp->cl_nblks++))
    {
        nzfree(buffer);
        return (ERROR);
    }

    /* Get the root block. */

    if (!clblock(0, MM_WRITE))
    {
        nzfree(buffer);
        return (ERROR);
    }

    /* Construct the words of the block. */

    dptr = makeib(cp->cl_blkptr, cp->cl_nblks-1, (char *) 0, Scdecomp);
    dptr = makeib(dptr, cp->cl_nblks - 2, Scdecomp, Cldecomp);

    /* Set up the block header. */

    bptr = cp->cl_blk0;
    bptr[0] = CL_REV;
    bs(bptr)->st_used = dptr - bptr;

    /* All done. */

    nzfree(buffer);
    return (OKAY);
}