2023-03-04 03:45:20 +01:00

1 line
9.6 KiB
C
Executable File

/***********************************************************************\
Filename: lex.c
\***********************************************************************/
#include <memory.h>
#include "lex.h"
#include "spmemory.h"
#include "environ.h"
#include "string.h"
#include "stdio.h"
#include "sp.h"
#include "spdef.h"
/* What used to be the lex structure - now there's only one, so I just use
vars. */
extern int _THActive[];
short Lexenvcode; /* lexicon's environment code */
short Lexscrev; /* revision of sc compatible with lexicon */
short Lexprod; /* product code (shy, uh, etc.) */
short Lexsegwords; /* number of words in a segment */
short Lexnbsd; /* number of entries in BSD table */
short Lexngram; /* number of entries in di-gram table */
short Lexnindex; /* number of entries in the index table */
short Lexnstrings; /* size of the string area in bytes */
/* locations of tables used by decompression routines */
char *Lexstrings; /* actual memory area used for the strings */
char *Lexbs; /* Backspaces for BSD's */
char **Lexdelta; /* Deltas for BSD's */
char (*Lexgramtab)[2]; /* di-grams */
char **Lexindextab; /* block seed words */
/* minimum indices into compressed data tables */
int Lexminbsd; /* byte code for first BSD */
int Lexmingrm; /* byte code for first multi-gram */
/* miscellaneous */
HANDLE Lexfile; /* file access handle */
char Lexadjptr; /* pointers have been adjusted */
int Lexoffset; /* block offset to first word of lexicon */
char *Lexblk; /* location of decompression buffer start */
char *Lexblkptr; /* current location in buffer */
/* Things I coded in assembly */
extern pascal int lexcmp();
extern pascal void lexstrip();
extern pascal void lexphon();
extern pascal int lexbinnext();
/*
Opening the lexicon requires several steps. They are:
2) Open the lexicon file.
3) Read in the header information and verify that the lexicon is
valid for the current language and version of the spelling
components.
4) Allocate memory for and read in (or compute from data already
read) the tables needed to access the lexicon.
*/
lexalloc(fname)
char *fname; /* compressed lexicon to be opened */
{
extern char *lexgetstr();
char *strings; /* pointer to strings section */
/* Open the lexicon file and skip the copyright message. */
if ((Lexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
return (FALSE);
if (stdseek((long)COPYRBYTES, Lexfile))
return (FALSE);
/* Read in the lexicon header and form the LEX structure from it.
Then check that the lexicon's language and the current language
match; and that the lexicon is the right version. */
if (bytread((char *) &Lexenvcode, 2 * LEXHEADER, Lexfile) == ERROR)
return (FALSE);
/* Allocate read the backspace table, gram table, and strings. */
if (abytread(&Lexbs, (unsigned)Lexnbsd, Lexfile) == ERROR
|| abytread((char **)&Lexgramtab, (unsigned)(Lexngram << 1), Lexfile)
== ERROR || abytread(&Lexstrings, (unsigned)Lexnstrings, Lexfile) == ERROR)
return (FALSE);
/* Allocate the delta and index tables and initialize them with
pointers into the string table. */
if (!(strings = lexgetstr(Lexstrings, &Lexdelta, Lexnbsd))
|| !lexgetstr(strings, &Lexindextab, Lexnindex))
return (FALSE);
/* determine offset in lexicon file of first byte of word data; this
is the total size of the header and its tables, rounded up to the
nearest logical block boundary. This code only works if
MM_BLKSIZE is a power of 2. */
Lexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
+ Lexnbsd + (Lexngram << 1) + Lexnstrings)
& -MM_BLKSIZE) / MM_BLKSIZE + 1;
/* to determine which ranges of byte will acquire the meaning "BSD
byte" and "di-gram byte", specify the minimum value for each of
these ranges */
Lexminbsd = NUMBACK + NPHON;
Lexmingrm = Lexminbsd + Lexnbsd;
/* save an unnecessary subtraction during decompression by
pre-decrementing the BSD and di-gram pointers. */
Lexadjptr = TRUE;
Lexbs -= Lexminbsd;
Lexdelta -= Lexminbsd;
Lexgramtab -= Lexmingrm;
return(TRUE);
}
/* Function to assign string pointers for strings read from the lexicon. */
STATIC char *
lexgetstr(sp, asp, len)
register char *sp; /* Points into string table. */
char ***asp; /* Where to put allocated array. */
int len; /* Number of pointers. */
{
register char **tp;
/* Allocate the space for the tables. */
if (!(tp = (char **) zalloc(_SPID, len * 4)))
return (NULL);
*asp = tp; /* Return the table address. */
/* Set the pointer address. */
while (--len >= 0)
{
*tp++ = sp;
while (*sp++)
;
}
return (sp); /* Return the next string address. */
}
/* Read a lexicon block from the current lexicon. */
STATIC int
lexread()
{
extern char *memread();
if ((Lexblk = memread(Sccurblk + Lexoffset, Lexfile, MM_READ | MM_PR2)) ==
NULL)
return (FALSE);
return (TRUE);
}
/* Free the lexicon stuff; close the file. */
VOID lexfree()
{
if (Lexadjptr)
{
Lexgramtab += Lexmingrm;
Lexbs += Lexminbsd;
Lexdelta += Lexminbsd;
}
if (Lexfile && !_THActive[0])
stdclose(Lexfile);
DisposeHandle(FindHandle((char *) Lexstrings));
DisposeHandle(FindHandle((char *) Lexindextab));
DisposeHandle(FindHandle((char *) Lexgramtab));
DisposeHandle(FindHandle((char *) Lexbs));
DisposeHandle(FindHandle((char *) Lexdelta));
}
/* This function is passed the end of a word that has flags in it. These
flags are stripped off and stored in the Sc vars. */
/* Coded in assembly. (RAH) */
/* Compare a word with a word that may have flags on the end. The second
argument is the one that may have the flags. */
/* Coded in assembly. (RAH) */
/* Binary-search the index table to find the block where word can be found
if it occurs in the lexicon. Return the resulting block number. */
static VOID lexblknum(word)
char *word;
{
int maxval; /* binary search limits */
int minval; /* binary search limits */
int i; /* index table entry to check */
minval = 0;
maxval = Lexnindex - 1;
while (minval < maxval)
{
i = (maxval + minval + 1) >> 1;
if (lexcmp(word, Lexindextab[i]) >= 0)
minval = i;
else
maxval = i - 1;
}
Sccurblk = maxval;
}
/* Decompress (to) the phonetic part of a word. This is called for a segment
word or after lexbinnext has processed the bs or bsd before the phonetic
characters. When this routine exits, the decompression pointer will
point to the bs or bsd of the next word or to the null at the end of the
lexicon block. */
/* Coded in assembly. (RAH) */
/* Try to find word in the lexicon. Return the flags as found in the
lex. If the word is not found the the first character of the flags will
be null. */
lexword(word)
register char *word; /* The word to look up */
{
char peword[2*MAXWORD]; /* phonetically encoded form */
/* word must fit in LONGWORD */
if (strlen((char *)word) >= LONGWORD)
return;
/* Look up the word in the lexicon. */
phfull(word, peword, Lexprod & SHORTLEX);
return(lexfetch(peword));
}
/* Look up the flagged form of word in the lexicon. */
lexfetch(peword)
char *peword; /* The word to seek to */
{
int x;
char *blkptr;
int seg;
char *currptr;
char *nextptr;
/* Point to the block that the word will be in. */
lexblknum(peword);
/* Read the block. */
if (!lexread())
return (FALSE);
/* Find the segment which contains the query word. To do so, look
through the segment head words to find one larger than the
query. When that one is found, stop. */
blkptr = nextptr = Lexblk;
currptr = blkptr + BLOCKSEGS;
for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
{
/* Make nextptr point to the start of the next segment. */
Lexblkptr = nextptr += Lexsegwords + ctoi(*blkptr++);
/* Decompress the first word of the segment. */
Scendptr = Scdecomp;
lexphon();
/* Stop when the query word is less than the segment word. */
if (strcmp(peword, Scdecomp) <= 0)
break;
currptr = nextptr;
}
/* Currptr now points to the offset to start searching. */
Lexblkptr = currptr;
/* If this is the first segment, initialize the decompression with
the index word for the block; otherwise, initialize it by getting
the first word from the segment. */
if (!seg)
{
Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]);
lexstrip(Scendptr);
}
else
{
Scendptr = Scdecomp;
lexphon();
}
/* Scan the segment until a word equal to or greater than
the desired word is found. */
while ((x = strcmp(peword, Scdecomp)) > 0)
if (!lexbinnext())
return (FALSE);
return (!x);
}
/* Decompress the next word from the lexicon. */
/* Coded in assembly. (RAH) */
/* Find and read the next block of the lexicon. Decompress the first word
of that block. */
lexnextblk()
{
if (++Sccurblk >= Lexnindex || !lexread())
return (FALSE);
Lexblkptr = Lexblk + BLOCKSEGS;
/* copy in new seed word and return pointer to end */
lexstrip(Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]));
return (TRUE);
}