antoine-source/appleworksgs/Spell/Src/LEX.C

/***********************************************************************\

   Filename: lex.c

\***********************************************************************/

#include <memory.h>
#include "lex.h"
#include "spmemory.h"
#include "environ.h"
#include "string.h"
#include "stdio.h"
#include "sp.h"
#include "spdef.h"

/* What used to be the lex structure - now there's only one, so I just use
   vars. */

extern int _THActive[];

short Lexenvcode;     /* lexicon's environment code */
short Lexscrev;       /* revision of sc compatible with lexicon */
short Lexprod;        /* product code (shy, uh, etc.) */
short Lexsegwords;    /* number of words in a segment */
short Lexnbsd;        /* number of entries in BSD table */
short Lexngram;       /* number of entries in di-gram table */
short Lexnindex;      /* number of entries in the index table */
short Lexnstrings;    /* size of the string area in bytes */

   /* locations of tables used by decompression routines */

char *Lexstrings;    /* actual memory area used for the strings */
char *Lexbs;         /* Backspaces for BSD's */
char **Lexdelta;     /* Deltas for BSD's */
char (*Lexgramtab)[2]; /* di-grams */
char **Lexindextab;  /* block seed words */

   /* minimum indices into compressed data tables */

int   Lexminbsd;      /* byte code for first BSD */
int   Lexmingrm;      /* byte code for first multi-gram */

   /* miscellaneous */

HANDLE  Lexfile;        /* file access handle */
char    Lexadjptr;      /* pointers have been adjusted */
int     Lexoffset;      /* block offset to first word of lexicon */
char   *Lexblk;        /* location of decompression buffer start */
char   *Lexblkptr;     /* current location in buffer */


/* Things I coded in assembly */

extern pascal int  lexcmp();
extern pascal void lexstrip();
extern pascal void lexphon();
extern pascal int  lexbinnext();


/*
   Opening the lexicon requires several steps. They are:

   2) Open the lexicon file.

   3) Read in the header information and verify that the lexicon is
      valid for the current language and version of the spelling
      components.

   4) Allocate memory for and read in (or compute from data already
      read) the tables needed to access the lexicon.
*/

lexalloc(fname)
char    *fname;                 /* compressed lexicon to be opened */
{
extern char *lexgetstr();
char   *strings;       /* pointer to strings section */

   /* Open the lexicon file and skip the copyright message.  */

        if ((Lexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
            return (FALSE);


    if (stdseek((long)COPYRBYTES, Lexfile))
        return (FALSE);

   /* Read in the lexicon header and form the LEX structure from it.
      Then check that the lexicon's language and the current language
      match; and that the lexicon is the right version.  */

   if (bytread((char *) &Lexenvcode, 2 * LEXHEADER, Lexfile) == ERROR)
       return (FALSE);

   /* Allocate read the backspace table, gram table, and strings. */

   if (abytread(&Lexbs, (unsigned)Lexnbsd, Lexfile) == ERROR
     || abytread((char **)&Lexgramtab, (unsigned)(Lexngram << 1), Lexfile)
     == ERROR || abytread(&Lexstrings, (unsigned)Lexnstrings, Lexfile) == ERROR)
       return (FALSE);

   /* Allocate the delta and index tables and initialize them with
      pointers into the string table. */

   if (!(strings = lexgetstr(Lexstrings, &Lexdelta, Lexnbsd))
     || !lexgetstr(strings, &Lexindextab, Lexnindex))
       return (FALSE);

   /* determine offset in lexicon file of first byte of word data; this
      is the total size of the header and its tables, rounded up to the
      nearest logical block boundary.  This code only works if
      MM_BLKSIZE is a power of 2.  */

   Lexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
     + Lexnbsd + (Lexngram << 1) + Lexnstrings)
       & -MM_BLKSIZE) / MM_BLKSIZE + 1;

   /* to determine which ranges of byte will acquire the meaning "BSD
      byte" and "di-gram byte", specify the minimum value for each of
      these ranges */

   Lexminbsd = NUMBACK + NPHON;
   Lexmingrm = Lexminbsd + Lexnbsd;

   /* save an unnecessary subtraction during decompression by
      pre-decrementing the BSD and di-gram pointers. */

   Lexadjptr = TRUE;
   Lexbs -= Lexminbsd;
   Lexdelta -= Lexminbsd;
   Lexgramtab -= Lexmingrm;

   return(TRUE);
}

/* Function to assign string pointers for strings read from the lexicon. */

STATIC char *
lexgetstr(sp, asp, len)
register char *sp;             /* Points into string table. */
char   ***asp;                 /* Where to put allocated array. */
int     len;                    /* Number of pointers. */
{
register char **tp;

   /* Allocate the space for the tables. */

   if (!(tp = (char **) zalloc(_SPID, len * 4)))
       return (NULL);

   *asp = tp;                      /* Return the table address. */

   /* Set the pointer address. */

   while (--len >= 0)
   {
       *tp++ = sp;
       while (*sp++)
           ;
   }

   return (sp);                    /* Return the next string address. */
}

/* Read a lexicon block from the current lexicon. */

STATIC int
lexread()
{
   extern char *memread();

   if ((Lexblk = memread(Sccurblk + Lexoffset, Lexfile, MM_READ | MM_PR2)) ==
     NULL)
       return (FALSE);
   return (TRUE);
}

/* Free the lexicon stuff; close the file. */

VOID lexfree()
{
   if (Lexadjptr)
   {
       Lexgramtab += Lexmingrm;
       Lexbs += Lexminbsd;
       Lexdelta += Lexminbsd;
   }
   if (Lexfile && !_THActive[0])
       stdclose(Lexfile);
   DisposeHandle(FindHandle((char *) Lexstrings));
   DisposeHandle(FindHandle((char *) Lexindextab));
   DisposeHandle(FindHandle((char *) Lexgramtab));
   DisposeHandle(FindHandle((char *) Lexbs));
   DisposeHandle(FindHandle((char *) Lexdelta));
}

/* This function is passed the end of a word that has flags in it. These
   flags are stripped off and stored in the Sc vars. */

   /* Coded in assembly.  (RAH) */

/* Compare a word with a word that may have flags on the end.  The second
   argument is the one that may have the flags.  */

   /* Coded in assembly.  (RAH) */

/* Binary-search the index table to find the block where word can be found
   if it occurs in the lexicon.  Return the resulting block number.  */

static VOID lexblknum(word)
char *word;
{
int maxval;            /* binary search limits */
int minval;            /* binary search limits */
int i;                 /* index table entry to check */

   minval = 0;
   maxval = Lexnindex - 1;
   while (minval < maxval)
   {
       i = (maxval + minval + 1) >> 1;
       if (lexcmp(word, Lexindextab[i]) >= 0)
           minval = i;
       else
           maxval = i - 1;
   }
   Sccurblk = maxval;
}

/* Decompress (to) the phonetic part of a word.  This is called for a segment
   word or after lexbinnext has processed the bs or bsd before the phonetic
   characters.  When this routine exits, the decompression pointer will
   point to the bs or bsd of the next word or to the null at the end of the
   lexicon block.  */

   /* Coded in assembly.  (RAH) */

/* Try to find word in the lexicon.  Return the flags as found in the
   lex.  If the word is not found the the first character of the flags will
   be null.  */

lexword(word)
register char *word;          /* The word to look up */
{
char peword[2*MAXWORD];       /* phonetically encoded form */

   /* word must fit in LONGWORD */

   if (strlen((char *)word) >= LONGWORD)
       return;

   /* Look up the word in the lexicon. */

   phfull(word, peword, Lexprod & SHORTLEX);

   return(lexfetch(peword));
}

/* Look up the flagged form of word in the lexicon. */

lexfetch(peword)
char   *peword;                /* The word to seek to */
{
int     x;
char    *blkptr;
int     seg;
char    *currptr;
char    *nextptr;

   /* Point to the block that the word will be in. */

   lexblknum(peword);

   /* Read the block. */

   if (!lexread())
       return (FALSE);

   /* Find the segment which contains the query word. To do so, look
      through the segment head words to find one larger than the
      query. When that one is found, stop. */

   blkptr = nextptr = Lexblk;
   currptr = blkptr + BLOCKSEGS;
   for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
   {
       /* Make nextptr point to the start of the next segment. */

       Lexblkptr = nextptr += Lexsegwords + ctoi(*blkptr++);

       /* Decompress the first word of the segment. */

       Scendptr = Scdecomp;
       lexphon();

       /* Stop when the query word is less than the segment word. */

       if (strcmp(peword, Scdecomp) <= 0)
           break;
       currptr = nextptr;
   }

   /* Currptr now points to the offset to start searching. */

   Lexblkptr = currptr;

   /* If this is the first segment, initialize the decompression with
      the index word for the block; otherwise, initialize it by getting
      the first word from the segment.  */

   if (!seg)
   {
       Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]);
       lexstrip(Scendptr);
   }
   else
   {
       Scendptr = Scdecomp;
       lexphon();
   }

   /* Scan the segment until a word equal to or greater than
      the desired word is found. */

    while ((x = strcmp(peword, Scdecomp)) > 0)
        if (!lexbinnext())
            return (FALSE);
    return (!x);
}

/* Decompress the next word from the lexicon. */

    /* Coded in assembly.  (RAH) */

/* Find and read the next block of the lexicon.  Decompress the first word
   of that block.  */

lexnextblk()
{
   if (++Sccurblk >= Lexnindex || !lexread())
       return (FALSE);
   Lexblkptr = Lexblk + BLOCKSEGS;

   /* copy in new seed word and return pointer to end */

   lexstrip(Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]));
   return (TRUE);
}