mirror of
https://github.com/antoinevignau/source.git
synced 2025-01-22 14:30:24 +00:00
1 line
9.6 KiB
C
Executable File
1 line
9.6 KiB
C
Executable File
/***********************************************************************\
|
|
|
|
Filename: lex.c
|
|
|
|
\***********************************************************************/
|
|
|
|
#include <memory.h>
|
|
#include "lex.h"
|
|
#include "spmemory.h"
|
|
#include "environ.h"
|
|
#include "string.h"
|
|
#include "stdio.h"
|
|
#include "sp.h"
|
|
#include "spdef.h"
|
|
|
|
/* What used to be the lex structure - now there's only one, so I just use
|
|
vars. */
|
|
|
|
extern int _THActive[];
|
|
|
|
short Lexenvcode; /* lexicon's environment code */
|
|
short Lexscrev; /* revision of sc compatible with lexicon */
|
|
short Lexprod; /* product code (shy, uh, etc.) */
|
|
short Lexsegwords; /* number of words in a segment */
|
|
short Lexnbsd; /* number of entries in BSD table */
|
|
short Lexngram; /* number of entries in di-gram table */
|
|
short Lexnindex; /* number of entries in the index table */
|
|
short Lexnstrings; /* size of the string area in bytes */
|
|
|
|
/* locations of tables used by decompression routines */
|
|
|
|
char *Lexstrings; /* actual memory area used for the strings */
|
|
char *Lexbs; /* Backspaces for BSD's */
|
|
char **Lexdelta; /* Deltas for BSD's */
|
|
char (*Lexgramtab)[2]; /* di-grams */
|
|
char **Lexindextab; /* block seed words */
|
|
|
|
/* minimum indices into compressed data tables */
|
|
|
|
int Lexminbsd; /* byte code for first BSD */
|
|
int Lexmingrm; /* byte code for first multi-gram */
|
|
|
|
/* miscellaneous */
|
|
|
|
HANDLE Lexfile; /* file access handle */
|
|
char Lexadjptr; /* pointers have been adjusted */
|
|
int Lexoffset; /* block offset to first word of lexicon */
|
|
char *Lexblk; /* location of decompression buffer start */
|
|
char *Lexblkptr; /* current location in buffer */
|
|
|
|
|
|
/* Things I coded in assembly */
|
|
|
|
extern pascal int lexcmp();
|
|
extern pascal void lexstrip();
|
|
extern pascal void lexphon();
|
|
extern pascal int lexbinnext();
|
|
|
|
|
|
/*
|
|
Opening the lexicon requires several steps. They are:
|
|
|
|
2) Open the lexicon file.
|
|
|
|
3) Read in the header information and verify that the lexicon is
|
|
valid for the current language and version of the spelling
|
|
components.
|
|
|
|
4) Allocate memory for and read in (or compute from data already
|
|
read) the tables needed to access the lexicon.
|
|
*/
|
|
|
|
lexalloc(fname)
|
|
char *fname; /* compressed lexicon to be opened */
|
|
{
|
|
extern char *lexgetstr();
|
|
char *strings; /* pointer to strings section */
|
|
|
|
/* Open the lexicon file and skip the copyright message. */
|
|
|
|
if ((Lexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
|
|
return (FALSE);
|
|
|
|
|
|
if (stdseek((long)COPYRBYTES, Lexfile))
|
|
return (FALSE);
|
|
|
|
/* Read in the lexicon header and form the LEX structure from it.
|
|
Then check that the lexicon's language and the current language
|
|
match; and that the lexicon is the right version. */
|
|
|
|
if (bytread((char *) &Lexenvcode, 2 * LEXHEADER, Lexfile) == ERROR)
|
|
return (FALSE);
|
|
|
|
/* Allocate read the backspace table, gram table, and strings. */
|
|
|
|
if (abytread(&Lexbs, (unsigned)Lexnbsd, Lexfile) == ERROR
|
|
|| abytread((char **)&Lexgramtab, (unsigned)(Lexngram << 1), Lexfile)
|
|
== ERROR || abytread(&Lexstrings, (unsigned)Lexnstrings, Lexfile) == ERROR)
|
|
return (FALSE);
|
|
|
|
/* Allocate the delta and index tables and initialize them with
|
|
pointers into the string table. */
|
|
|
|
if (!(strings = lexgetstr(Lexstrings, &Lexdelta, Lexnbsd))
|
|
|| !lexgetstr(strings, &Lexindextab, Lexnindex))
|
|
return (FALSE);
|
|
|
|
/* determine offset in lexicon file of first byte of word data; this
|
|
is the total size of the header and its tables, rounded up to the
|
|
nearest logical block boundary. This code only works if
|
|
MM_BLKSIZE is a power of 2. */
|
|
|
|
Lexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
|
|
+ Lexnbsd + (Lexngram << 1) + Lexnstrings)
|
|
& -MM_BLKSIZE) / MM_BLKSIZE + 1;
|
|
|
|
/* to determine which ranges of byte will acquire the meaning "BSD
|
|
byte" and "di-gram byte", specify the minimum value for each of
|
|
these ranges */
|
|
|
|
Lexminbsd = NUMBACK + NPHON;
|
|
Lexmingrm = Lexminbsd + Lexnbsd;
|
|
|
|
/* save an unnecessary subtraction during decompression by
|
|
pre-decrementing the BSD and di-gram pointers. */
|
|
|
|
Lexadjptr = TRUE;
|
|
Lexbs -= Lexminbsd;
|
|
Lexdelta -= Lexminbsd;
|
|
Lexgramtab -= Lexmingrm;
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/* Function to assign string pointers for strings read from the lexicon. */
|
|
|
|
STATIC char *
|
|
lexgetstr(sp, asp, len)
|
|
register char *sp; /* Points into string table. */
|
|
char ***asp; /* Where to put allocated array. */
|
|
int len; /* Number of pointers. */
|
|
{
|
|
register char **tp;
|
|
|
|
/* Allocate the space for the tables. */
|
|
|
|
if (!(tp = (char **) zalloc(_SPID, len * 4)))
|
|
return (NULL);
|
|
|
|
*asp = tp; /* Return the table address. */
|
|
|
|
/* Set the pointer address. */
|
|
|
|
while (--len >= 0)
|
|
{
|
|
*tp++ = sp;
|
|
while (*sp++)
|
|
;
|
|
}
|
|
|
|
return (sp); /* Return the next string address. */
|
|
}
|
|
|
|
/* Read a lexicon block from the current lexicon. */
|
|
|
|
STATIC int
|
|
lexread()
|
|
{
|
|
extern char *memread();
|
|
|
|
if ((Lexblk = memread(Sccurblk + Lexoffset, Lexfile, MM_READ | MM_PR2)) ==
|
|
NULL)
|
|
return (FALSE);
|
|
return (TRUE);
|
|
}
|
|
|
|
/* Free the lexicon stuff; close the file. */
|
|
|
|
VOID lexfree()
|
|
{
|
|
if (Lexadjptr)
|
|
{
|
|
Lexgramtab += Lexmingrm;
|
|
Lexbs += Lexminbsd;
|
|
Lexdelta += Lexminbsd;
|
|
}
|
|
if (Lexfile && !_THActive[0])
|
|
stdclose(Lexfile);
|
|
DisposeHandle(FindHandle((char *) Lexstrings));
|
|
DisposeHandle(FindHandle((char *) Lexindextab));
|
|
DisposeHandle(FindHandle((char *) Lexgramtab));
|
|
DisposeHandle(FindHandle((char *) Lexbs));
|
|
DisposeHandle(FindHandle((char *) Lexdelta));
|
|
}
|
|
|
|
/* This function is passed the end of a word that has flags in it. These
|
|
flags are stripped off and stored in the Sc vars. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Compare a word with a word that may have flags on the end. The second
|
|
argument is the one that may have the flags. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Binary-search the index table to find the block where word can be found
|
|
if it occurs in the lexicon. Return the resulting block number. */
|
|
|
|
static VOID lexblknum(word)
|
|
char *word;
|
|
{
|
|
int maxval; /* binary search limits */
|
|
int minval; /* binary search limits */
|
|
int i; /* index table entry to check */
|
|
|
|
minval = 0;
|
|
maxval = Lexnindex - 1;
|
|
while (minval < maxval)
|
|
{
|
|
i = (maxval + minval + 1) >> 1;
|
|
if (lexcmp(word, Lexindextab[i]) >= 0)
|
|
minval = i;
|
|
else
|
|
maxval = i - 1;
|
|
}
|
|
Sccurblk = maxval;
|
|
}
|
|
|
|
/* Decompress (to) the phonetic part of a word. This is called for a segment
|
|
word or after lexbinnext has processed the bs or bsd before the phonetic
|
|
characters. When this routine exits, the decompression pointer will
|
|
point to the bs or bsd of the next word or to the null at the end of the
|
|
lexicon block. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Try to find word in the lexicon. Return the flags as found in the
|
|
lex. If the word is not found the the first character of the flags will
|
|
be null. */
|
|
|
|
lexword(word)
|
|
register char *word; /* The word to look up */
|
|
{
|
|
char peword[2*MAXWORD]; /* phonetically encoded form */
|
|
|
|
/* word must fit in LONGWORD */
|
|
|
|
if (strlen((char *)word) >= LONGWORD)
|
|
return;
|
|
|
|
/* Look up the word in the lexicon. */
|
|
|
|
phfull(word, peword, Lexprod & SHORTLEX);
|
|
|
|
return(lexfetch(peword));
|
|
}
|
|
|
|
/* Look up the flagged form of word in the lexicon. */
|
|
|
|
lexfetch(peword)
|
|
char *peword; /* The word to seek to */
|
|
{
|
|
int x;
|
|
char *blkptr;
|
|
int seg;
|
|
char *currptr;
|
|
char *nextptr;
|
|
|
|
/* Point to the block that the word will be in. */
|
|
|
|
lexblknum(peword);
|
|
|
|
/* Read the block. */
|
|
|
|
if (!lexread())
|
|
return (FALSE);
|
|
|
|
/* Find the segment which contains the query word. To do so, look
|
|
through the segment head words to find one larger than the
|
|
query. When that one is found, stop. */
|
|
|
|
blkptr = nextptr = Lexblk;
|
|
currptr = blkptr + BLOCKSEGS;
|
|
for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
|
|
{
|
|
/* Make nextptr point to the start of the next segment. */
|
|
|
|
Lexblkptr = nextptr += Lexsegwords + ctoi(*blkptr++);
|
|
|
|
/* Decompress the first word of the segment. */
|
|
|
|
Scendptr = Scdecomp;
|
|
lexphon();
|
|
|
|
/* Stop when the query word is less than the segment word. */
|
|
|
|
if (strcmp(peword, Scdecomp) <= 0)
|
|
break;
|
|
currptr = nextptr;
|
|
}
|
|
|
|
/* Currptr now points to the offset to start searching. */
|
|
|
|
Lexblkptr = currptr;
|
|
|
|
/* If this is the first segment, initialize the decompression with
|
|
the index word for the block; otherwise, initialize it by getting
|
|
the first word from the segment. */
|
|
|
|
if (!seg)
|
|
{
|
|
Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]);
|
|
lexstrip(Scendptr);
|
|
}
|
|
else
|
|
{
|
|
Scendptr = Scdecomp;
|
|
lexphon();
|
|
}
|
|
|
|
/* Scan the segment until a word equal to or greater than
|
|
the desired word is found. */
|
|
|
|
while ((x = strcmp(peword, Scdecomp)) > 0)
|
|
if (!lexbinnext())
|
|
return (FALSE);
|
|
return (!x);
|
|
}
|
|
|
|
/* Decompress the next word from the lexicon. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Find and read the next block of the lexicon. Decompress the first word
|
|
of that block. */
|
|
|
|
lexnextblk()
|
|
{
|
|
if (++Sccurblk >= Lexnindex || !lexread())
|
|
return (FALSE);
|
|
Lexblkptr = Lexblk + BLOCKSEGS;
|
|
|
|
/* copy in new seed word and return pointer to end */
|
|
|
|
lexstrip(Scendptr = strecpy(Scdecomp, Lexindextab[Sccurblk]));
|
|
return (TRUE);
|
|
}
|