antoine-source/appleworksgs/Spell/Src/TLEX.C

/***********************************************************************\

   Filename: Tlex.c

\***********************************************************************/

#include <memory.h>
#include "tlex.h"
#include "spmemory.h"
#include "tenviron.h"
#include "string.h"
#include "stdio.h"
#include "th.h"

/* What used to be the Tlex structure - now there's only one, so I just use
   vars. */

extern int _SPActive[];

short Tlexenvcode;     /* lexicon's environment code */
short Tlexscrev;       /* revision of sc compatible with lexicon */
short Tlexprod;        /* product code (shy, uh, etc.) */
short Tlexsegwords;    /* number of words in a segment */
short Tlexnbsd;        /* number of entries in BSD table */
short Tlexngram;       /* number of entries in di-gram table */
short Tlexnindex;      /* number of entries in the index table */
short Tlexnstrings;    /* size of the string area in bytes */

   /* locations of tables used by decompression routines */

char *Tlexstrings;    /* actual memory area used for the strings */
char *Tlexbs;         /* Backspaces for BSD's */
char **Tlexdelta;     /* Deltas for BSD's */
char (*Tlexgramtab)[2]; /* di-grams */
char **Tlexindextab;  /* block seed words */

   /* minimum indices into compressed data tables */

int   Tlexminbsd;      /* byte code for first BSD */
int   Tlexmingrm;      /* byte code for first multi-gram */

   /* miscellaneous */


HANDLE  Tlexfile;        /* file access handle */
char    Tlexadjptr;      /* pointers have been adjusted */
int     Tlexoffset;      /* block offset to first word of lexicon */
char    *Tlexblk;        /* location of decompression buffer start */
char    *Tlexblkptr;     /* current location in buffer */

/* Things I coded in assembly */

extern pascal int  Tlexcmp();
/*extern pascal void Tlexstrip();*/
extern pascal void Tlexphon();
/*extern pascal int  Tlexbinnext();*/

VOID Tlexstrip(eptr)
 char *eptr;   /* Points to the end of the string */
{
 char *fptr;   /* Points to the output buffer */

   eptr -= 2;
   fptr = Tscfdecomp;
   Tsctag = 0;

   /* If the word has no flags the return common flags */

   if (ctoi(*eptr) != FLAGSIND)
   {
       *fptr++ = IW_COMMON;
       *fptr = 0;
       return;
   }

   /* Find the start of the flags */

   while (ctoi(*(eptr -= 2)) == FLAGSIND)
       ;
   eptr += 2;

   /* Detach the flag from the word. */

   *eptr++ = 0;

   /* Move the flags to the flag buffer; move any tag to the tag
      value.  */

   do
   {
       if (*eptr & IW_TAG)
           Tsctag = ctoi(*eptr++);
       else
           *fptr++ = *eptr++;
   }
   while (*eptr++);
   *fptr = 0;

   /* If the word had only a tag, make it common. */

   if (Tscfdecomp[0] == 0)
   {
       *fptr++ = IW_COMMON;
       *fptr = 0;
   }
}

/*
   Opening the lexicon requires several steps. They are:

   2) Open the lexicon file.

   3) Read in the header information and verify that the lexicon is
      valid for the current language and version of the spelling
      components.

   4) Allocate memory for and read in (or compute from data already
      read) the tables needed to access the lexicon.
*/

Tlexalloc(fname)
char    *fname;                 /* compressed lexicon to be opened */
{
extern char *Tlexgetstr();
char   *strings;       /* pointer to strings section */

    /* Open the lexicon file and skip the copyright message.  */

        if ((Tlexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
            return (FALSE);

    if (stdseek((long)COPYRBYTES, Tlexfile))
        return (FALSE);

   /* Read in the lexicon header and form the LEX structure from it.
      Then check that the lexicon's language and the current language
      match; and that the lexicon is the right version.  */

   if (bytread((char *) &Tlexenvcode, 2 * LEXHEADER, Tlexfile) == ERROR)
       return (FALSE);

   /* Allocate read the backspace table, gram table, and strings. */

   if (abytread(&Tlexbs, (unsigned)Tlexnbsd, Tlexfile) == ERROR
     || abytread((char **)&Tlexgramtab, (unsigned)(Tlexngram << 1), Tlexfile)
     == ERROR || abytread(&Tlexstrings, (unsigned)Tlexnstrings, Tlexfile) == ERROR)
       return (FALSE);

   /* Allocate the delta and index tables and initialize them with
      pointers into the string table. */

   if (!(strings = Tlexgetstr(Tlexstrings, &Tlexdelta, Tlexnbsd))
     || !Tlexgetstr(strings, &Tlexindextab, Tlexnindex))
       return (FALSE);

   /* determine offset in lexicon file of first byte of word data; this
      is the total size of the header and its tables, rounded up to the
      nearest logical block boundary.  This code only works if
      MM_BLKSIZE is a power of 2.  */

   Tlexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
     + Tlexnbsd + (Tlexngram << 1) + Tlexnstrings)
       & -MM_BLKSIZE) / MM_BLKSIZE + 1;

   /* to determine which ranges of byte will acquire the meaning "BSD
      byte" and "di-gram byte", specify the minimum value for each of
      these ranges */

   Tlexminbsd = NUMBACK + NPHON;
   Tlexmingrm = Tlexminbsd + Tlexnbsd;

   /* save an unnecessary subtraction during decompression by
      pre-decrementing the BSD and di-gram pointers. */

   Tlexadjptr = TRUE;
   Tlexbs -= Tlexminbsd;
   Tlexdelta -= Tlexminbsd;
   Tlexgramtab -= Tlexmingrm;

   return(TRUE);
}

/* Function to assign string pointers for strings read from the lexicon. */

STATIC char *
Tlexgetstr(sp, asp, len)
char *sp;                   /* Points into string table. */
char   ***asp;              /* Where to put allocated array. */
int     len;                    /* Number of pointers. */
{
char **tp;

    /* Allocate the space for the tables. */

    if (!(tp = (char **) zalloc(_THID, len * sizeof(char *))))
        return (NULL);

    *asp = tp;                      /* Return the table address. */

    /* Set the pointer address. */

    while (--len >= 0)
    {
        *tp++ = sp;
        while (*sp++)
            ;
    }

    return (sp);                    /* Return the next string address. */
}

/* Read a lexicon block from the current lexicon. */

STATIC int
Tlexread()
{
   extern char *memread();

   if ((Tlexblk = memread(Tsccurblk + Tlexoffset, Tlexfile, MM_READ | MM_PR2)) ==
     NULL)
   {
       return (FALSE);
   }
   return (TRUE);
}

/* Free the lexicon stuff; close the file. */

VOID Tlexfree()
{
   if (Tlexadjptr)
   {
       Tlexgramtab += Tlexmingrm;
       Tlexbs += Tlexminbsd;
       Tlexdelta += Tlexminbsd;
   }
   if (Tlexfile && !_SPActive[0])
       stdclose(Tlexfile);
   nzfree(Tlexstrings);
   nzfree(Tlexindextab);
   nzfree(Tlexgramtab);
   nzfree(Tlexbs);
   nzfree(Tlexdelta);
}

/* This function is passed the end of a word that has flags in it. These
   flags are stripped off and stored in the Sc vars. */

   /* Coded in assembly.  (RAH) */

/* Compare a word with a word that may have flags on the end.  The second
   argument is the one that may have the flags.  */

   /* Coded in assembly.  (RAH) */

/* Binary-search the index table to find the block where word can be found
   if it occurs in the lexicon.  Return the resulting block number.  */

void Tlexblknum(word)
char *word;
{
int maxval;            /* binary search limits */
int minval;            /* binary search limits */
int i;                 /* index table entry to check */

   minval = 0;
   maxval = Tlexnindex - 1;
   while (minval < maxval)
   {
       i = (maxval + minval + 1) >> 1;
       if (Tlexcmp(word, Tlexindextab[i]) >= 0)
           minval = i;
       else
           maxval = i - 1;
   }
   Tsccurblk = maxval;
}

/* Decompress (to) the phonetic part of a word.  This is called for a segment
   word or after lexbinnext has processed the bs or bsd before the phonetic
   characters.  When this routine exits, the decompression pointer will
   point to the bs or bsd of the next word or to the null at the end of the
   lexicon block.  */

   /* Coded in assembly.  (RAH) */

/* Try to find word in the lexicon.  Return the flags as found in the
   lex.  If the word is not found the the first character of the flags will
   be null.  */

Tlexword(word)
char *word;          /* The word to look up */
{
char peword[2*MAXWORD];       /* phonetically encoded form */

   /* word must fit in LONGWORD */

   if (strlen((char *)word) >= LONGWORD)
   {
       return;
   }

   /* Look up the word in the lexicon. */

   phfull(word, peword, Tlexprod & SHORTLEX);

   return(Tlexfetch(peword));
}

/* Look up the flagged form of word in the lexicon. */

Tlexfetch(peword)
char   *peword;                /* The word to seek to */
{
int     x;
char    *blkptr;
int     seg;
char    *currptr;
char    *nextptr;

   /* Point to the block that the word will be in. */

   Tlexblknum(peword);

   /* Read the block. */

   if (!Tlexread())
       return (FALSE);

   /* Find the segment which contains the query word. To do so, look
      through the segment head words to find one larger than the
      query. When that one is found, stop. */

   blkptr = nextptr = Tlexblk;
   currptr = blkptr + BLOCKSEGS;
   for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
   {
       /* Make nextptr point to the start of the next segment. */

       Tlexblkptr = nextptr += Tlexsegwords + ctoi(*blkptr++);

       /* Decompress the first word of the segment. */

       Tscendptr = Tscdecomp;
       Tlexphon();

       /* Stop when the query word is less than the segment word. */

       if (strcmp(peword, Tscdecomp) <= 0)
           break;
       currptr = nextptr;
   }

   /* Currptr now points to the offset to start searching. */

   Tlexblkptr = currptr;

   /* If this is the first segment, initialize the decompression with
      the index word for the block; otherwise, initialize it by getting
      the first word from the segment.  */

   if (!seg)
   {
       Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]);
       Tlexstrip(Tscendptr);
   }
   else
   {
       Tscendptr = Tscdecomp;
       Tlexphon();
   }

   /* Scan the segment until a word equal to or greater than
      the desired word is found. */

    while ((x = strcmp(peword, Tscdecomp)) > 0)
        if (!Tlexbinnext())
            return (FALSE);
    return (!x);
}

/* Decompress the next word from the lexicon. */

    /* Coded in assembly.  (RAH) */

/* Find and read the next block of the lexicon.  Decompress the first word
   of that block.  */

Tlexnextblk()
{
   if (++Tsccurblk >= Tlexnindex || !Tlexread())
       return (FALSE);
   Tlexblkptr = Tlexblk + BLOCKSEGS;

   /* copy in new seed word and return pointer to end */

   Tlexstrip(Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]));
   return (TRUE);
}

Tlexbinnext()
{
int cc;
char *blkptr;

   /* If at the end of the current block, read the next. */

   blkptr = Tlexblkptr;
   if ((cc = ctoi(*blkptr++)) == 0)
       return (Tlexnextblk());

   /* Apply the backspace or bsd. */

   if (cc < Tlexminbsd)
   {
       Tscendptr -= cc - NPHON;

       /* look for dual backspace sequence */

       if (cc == NPHON + (NUMBACK - 1))
           Tscendptr -= ctoi(*blkptr++) - NPHON;
   }
   else
        Tscendptr = strecpy(Tscendptr-Tlexbs[cc], Tlexdelta[cc]);
   Tlexblkptr = blkptr;

   /* Get the phonetic part for the word. */

   Tlexphon();

   return (TRUE);
}