mirror of
https://github.com/antoinevignau/source.git
synced 2025-01-07 22:32:55 +00:00
1 line
11 KiB
C
Executable File
1 line
11 KiB
C
Executable File
/***********************************************************************\
|
|
|
|
Filename: Tlex.c
|
|
|
|
\***********************************************************************/
|
|
|
|
#include <memory.h>
|
|
#include "tlex.h"
|
|
#include "spmemory.h"
|
|
#include "tenviron.h"
|
|
#include "string.h"
|
|
#include "stdio.h"
|
|
#include "th.h"
|
|
|
|
/* What used to be the Tlex structure - now there's only one, so I just use
|
|
vars. */
|
|
|
|
extern int _SPActive[];
|
|
|
|
short Tlexenvcode; /* lexicon's environment code */
|
|
short Tlexscrev; /* revision of sc compatible with lexicon */
|
|
short Tlexprod; /* product code (shy, uh, etc.) */
|
|
short Tlexsegwords; /* number of words in a segment */
|
|
short Tlexnbsd; /* number of entries in BSD table */
|
|
short Tlexngram; /* number of entries in di-gram table */
|
|
short Tlexnindex; /* number of entries in the index table */
|
|
short Tlexnstrings; /* size of the string area in bytes */
|
|
|
|
/* locations of tables used by decompression routines */
|
|
|
|
char *Tlexstrings; /* actual memory area used for the strings */
|
|
char *Tlexbs; /* Backspaces for BSD's */
|
|
char **Tlexdelta; /* Deltas for BSD's */
|
|
char (*Tlexgramtab)[2]; /* di-grams */
|
|
char **Tlexindextab; /* block seed words */
|
|
|
|
/* minimum indices into compressed data tables */
|
|
|
|
int Tlexminbsd; /* byte code for first BSD */
|
|
int Tlexmingrm; /* byte code for first multi-gram */
|
|
|
|
/* miscellaneous */
|
|
|
|
|
|
HANDLE Tlexfile; /* file access handle */
|
|
char Tlexadjptr; /* pointers have been adjusted */
|
|
int Tlexoffset; /* block offset to first word of lexicon */
|
|
char *Tlexblk; /* location of decompression buffer start */
|
|
char *Tlexblkptr; /* current location in buffer */
|
|
|
|
/* Things I coded in assembly */
|
|
|
|
extern pascal int Tlexcmp();
|
|
/*extern pascal void Tlexstrip();*/
|
|
extern pascal void Tlexphon();
|
|
/*extern pascal int Tlexbinnext();*/
|
|
|
|
VOID Tlexstrip(eptr)
|
|
char *eptr; /* Points to the end of the string */
|
|
{
|
|
char *fptr; /* Points to the output buffer */
|
|
|
|
eptr -= 2;
|
|
fptr = Tscfdecomp;
|
|
Tsctag = 0;
|
|
|
|
/* If the word has no flags the return common flags */
|
|
|
|
if (ctoi(*eptr) != FLAGSIND)
|
|
{
|
|
*fptr++ = IW_COMMON;
|
|
*fptr = 0;
|
|
return;
|
|
}
|
|
|
|
/* Find the start of the flags */
|
|
|
|
while (ctoi(*(eptr -= 2)) == FLAGSIND)
|
|
;
|
|
eptr += 2;
|
|
|
|
/* Detach the flag from the word. */
|
|
|
|
*eptr++ = 0;
|
|
|
|
/* Move the flags to the flag buffer; move any tag to the tag
|
|
value. */
|
|
|
|
do
|
|
{
|
|
if (*eptr & IW_TAG)
|
|
Tsctag = ctoi(*eptr++);
|
|
else
|
|
*fptr++ = *eptr++;
|
|
}
|
|
while (*eptr++);
|
|
*fptr = 0;
|
|
|
|
/* If the word had only a tag, make it common. */
|
|
|
|
if (Tscfdecomp[0] == 0)
|
|
{
|
|
*fptr++ = IW_COMMON;
|
|
*fptr = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
Opening the lexicon requires several steps. They are:
|
|
|
|
2) Open the lexicon file.
|
|
|
|
3) Read in the header information and verify that the lexicon is
|
|
valid for the current language and version of the spelling
|
|
components.
|
|
|
|
4) Allocate memory for and read in (or compute from data already
|
|
read) the tables needed to access the lexicon.
|
|
*/
|
|
|
|
Tlexalloc(fname)
|
|
char *fname; /* compressed lexicon to be opened */
|
|
{
|
|
extern char *Tlexgetstr();
|
|
char *strings; /* pointer to strings section */
|
|
|
|
/* Open the lexicon file and skip the copyright message. */
|
|
|
|
if ((Tlexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
|
|
return (FALSE);
|
|
|
|
if (stdseek((long)COPYRBYTES, Tlexfile))
|
|
return (FALSE);
|
|
|
|
/* Read in the lexicon header and form the LEX structure from it.
|
|
Then check that the lexicon's language and the current language
|
|
match; and that the lexicon is the right version. */
|
|
|
|
if (bytread((char *) &Tlexenvcode, 2 * LEXHEADER, Tlexfile) == ERROR)
|
|
return (FALSE);
|
|
|
|
/* Allocate read the backspace table, gram table, and strings. */
|
|
|
|
if (abytread(&Tlexbs, (unsigned)Tlexnbsd, Tlexfile) == ERROR
|
|
|| abytread((char **)&Tlexgramtab, (unsigned)(Tlexngram << 1), Tlexfile)
|
|
== ERROR || abytread(&Tlexstrings, (unsigned)Tlexnstrings, Tlexfile) == ERROR)
|
|
return (FALSE);
|
|
|
|
/* Allocate the delta and index tables and initialize them with
|
|
pointers into the string table. */
|
|
|
|
if (!(strings = Tlexgetstr(Tlexstrings, &Tlexdelta, Tlexnbsd))
|
|
|| !Tlexgetstr(strings, &Tlexindextab, Tlexnindex))
|
|
return (FALSE);
|
|
|
|
/* determine offset in lexicon file of first byte of word data; this
|
|
is the total size of the header and its tables, rounded up to the
|
|
nearest logical block boundary. This code only works if
|
|
MM_BLKSIZE is a power of 2. */
|
|
|
|
Tlexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
|
|
+ Tlexnbsd + (Tlexngram << 1) + Tlexnstrings)
|
|
& -MM_BLKSIZE) / MM_BLKSIZE + 1;
|
|
|
|
/* to determine which ranges of byte will acquire the meaning "BSD
|
|
byte" and "di-gram byte", specify the minimum value for each of
|
|
these ranges */
|
|
|
|
Tlexminbsd = NUMBACK + NPHON;
|
|
Tlexmingrm = Tlexminbsd + Tlexnbsd;
|
|
|
|
/* save an unnecessary subtraction during decompression by
|
|
pre-decrementing the BSD and di-gram pointers. */
|
|
|
|
Tlexadjptr = TRUE;
|
|
Tlexbs -= Tlexminbsd;
|
|
Tlexdelta -= Tlexminbsd;
|
|
Tlexgramtab -= Tlexmingrm;
|
|
|
|
return(TRUE);
|
|
}
|
|
|
|
/* Function to assign string pointers for strings read from the lexicon. */
|
|
|
|
STATIC char *
|
|
Tlexgetstr(sp, asp, len)
|
|
char *sp; /* Points into string table. */
|
|
char ***asp; /* Where to put allocated array. */
|
|
int len; /* Number of pointers. */
|
|
{
|
|
char **tp;
|
|
|
|
/* Allocate the space for the tables. */
|
|
|
|
if (!(tp = (char **) zalloc(_THID, len * sizeof(char *))))
|
|
return (NULL);
|
|
|
|
*asp = tp; /* Return the table address. */
|
|
|
|
/* Set the pointer address. */
|
|
|
|
while (--len >= 0)
|
|
{
|
|
*tp++ = sp;
|
|
while (*sp++)
|
|
;
|
|
}
|
|
|
|
return (sp); /* Return the next string address. */
|
|
}
|
|
|
|
/* Read a lexicon block from the current lexicon. */
|
|
|
|
STATIC int
|
|
Tlexread()
|
|
{
|
|
extern char *memread();
|
|
|
|
if ((Tlexblk = memread(Tsccurblk + Tlexoffset, Tlexfile, MM_READ | MM_PR2)) ==
|
|
NULL)
|
|
{
|
|
return (FALSE);
|
|
}
|
|
return (TRUE);
|
|
}
|
|
|
|
/* Free the lexicon stuff; close the file. */
|
|
|
|
VOID Tlexfree()
|
|
{
|
|
if (Tlexadjptr)
|
|
{
|
|
Tlexgramtab += Tlexmingrm;
|
|
Tlexbs += Tlexminbsd;
|
|
Tlexdelta += Tlexminbsd;
|
|
}
|
|
if (Tlexfile && !_SPActive[0])
|
|
stdclose(Tlexfile);
|
|
nzfree(Tlexstrings);
|
|
nzfree(Tlexindextab);
|
|
nzfree(Tlexgramtab);
|
|
nzfree(Tlexbs);
|
|
nzfree(Tlexdelta);
|
|
}
|
|
|
|
/* This function is passed the end of a word that has flags in it. These
|
|
flags are stripped off and stored in the Sc vars. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Compare a word with a word that may have flags on the end. The second
|
|
argument is the one that may have the flags. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Binary-search the index table to find the block where word can be found
|
|
if it occurs in the lexicon. Return the resulting block number. */
|
|
|
|
void Tlexblknum(word)
|
|
char *word;
|
|
{
|
|
int maxval; /* binary search limits */
|
|
int minval; /* binary search limits */
|
|
int i; /* index table entry to check */
|
|
|
|
minval = 0;
|
|
maxval = Tlexnindex - 1;
|
|
while (minval < maxval)
|
|
{
|
|
i = (maxval + minval + 1) >> 1;
|
|
if (Tlexcmp(word, Tlexindextab[i]) >= 0)
|
|
minval = i;
|
|
else
|
|
maxval = i - 1;
|
|
}
|
|
Tsccurblk = maxval;
|
|
}
|
|
|
|
/* Decompress (to) the phonetic part of a word. This is called for a segment
|
|
word or after lexbinnext has processed the bs or bsd before the phonetic
|
|
characters. When this routine exits, the decompression pointer will
|
|
point to the bs or bsd of the next word or to the null at the end of the
|
|
lexicon block. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Try to find word in the lexicon. Return the flags as found in the
|
|
lex. If the word is not found the the first character of the flags will
|
|
be null. */
|
|
|
|
Tlexword(word)
|
|
char *word; /* The word to look up */
|
|
{
|
|
char peword[2*MAXWORD]; /* phonetically encoded form */
|
|
|
|
/* word must fit in LONGWORD */
|
|
|
|
if (strlen((char *)word) >= LONGWORD)
|
|
{
|
|
return;
|
|
}
|
|
|
|
/* Look up the word in the lexicon. */
|
|
|
|
phfull(word, peword, Tlexprod & SHORTLEX);
|
|
|
|
return(Tlexfetch(peword));
|
|
}
|
|
|
|
/* Look up the flagged form of word in the lexicon. */
|
|
|
|
Tlexfetch(peword)
|
|
char *peword; /* The word to seek to */
|
|
{
|
|
int x;
|
|
char *blkptr;
|
|
int seg;
|
|
char *currptr;
|
|
char *nextptr;
|
|
|
|
/* Point to the block that the word will be in. */
|
|
|
|
Tlexblknum(peword);
|
|
|
|
/* Read the block. */
|
|
|
|
if (!Tlexread())
|
|
return (FALSE);
|
|
|
|
/* Find the segment which contains the query word. To do so, look
|
|
through the segment head words to find one larger than the
|
|
query. When that one is found, stop. */
|
|
|
|
blkptr = nextptr = Tlexblk;
|
|
currptr = blkptr + BLOCKSEGS;
|
|
for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
|
|
{
|
|
/* Make nextptr point to the start of the next segment. */
|
|
|
|
Tlexblkptr = nextptr += Tlexsegwords + ctoi(*blkptr++);
|
|
|
|
/* Decompress the first word of the segment. */
|
|
|
|
Tscendptr = Tscdecomp;
|
|
Tlexphon();
|
|
|
|
/* Stop when the query word is less than the segment word. */
|
|
|
|
if (strcmp(peword, Tscdecomp) <= 0)
|
|
break;
|
|
currptr = nextptr;
|
|
}
|
|
|
|
/* Currptr now points to the offset to start searching. */
|
|
|
|
Tlexblkptr = currptr;
|
|
|
|
/* If this is the first segment, initialize the decompression with
|
|
the index word for the block; otherwise, initialize it by getting
|
|
the first word from the segment. */
|
|
|
|
if (!seg)
|
|
{
|
|
Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]);
|
|
Tlexstrip(Tscendptr);
|
|
}
|
|
else
|
|
{
|
|
Tscendptr = Tscdecomp;
|
|
Tlexphon();
|
|
}
|
|
|
|
/* Scan the segment until a word equal to or greater than
|
|
the desired word is found. */
|
|
|
|
while ((x = strcmp(peword, Tscdecomp)) > 0)
|
|
if (!Tlexbinnext())
|
|
return (FALSE);
|
|
return (!x);
|
|
}
|
|
|
|
/* Decompress the next word from the lexicon. */
|
|
|
|
/* Coded in assembly. (RAH) */
|
|
|
|
/* Find and read the next block of the lexicon. Decompress the first word
|
|
of that block. */
|
|
|
|
Tlexnextblk()
|
|
{
|
|
if (++Tsccurblk >= Tlexnindex || !Tlexread())
|
|
return (FALSE);
|
|
Tlexblkptr = Tlexblk + BLOCKSEGS;
|
|
|
|
/* copy in new seed word and return pointer to end */
|
|
|
|
Tlexstrip(Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]));
|
|
return (TRUE);
|
|
}
|
|
|
|
Tlexbinnext()
|
|
{
|
|
int cc;
|
|
char *blkptr;
|
|
|
|
/* If at the end of the current block, read the next. */
|
|
|
|
blkptr = Tlexblkptr;
|
|
if ((cc = ctoi(*blkptr++)) == 0)
|
|
return (Tlexnextblk());
|
|
|
|
/* Apply the backspace or bsd. */
|
|
|
|
if (cc < Tlexminbsd)
|
|
{
|
|
Tscendptr -= cc - NPHON;
|
|
|
|
/* look for dual backspace sequence */
|
|
|
|
if (cc == NPHON + (NUMBACK - 1))
|
|
Tscendptr -= ctoi(*blkptr++) - NPHON;
|
|
}
|
|
else
|
|
Tscendptr = strecpy(Tscendptr-Tlexbs[cc], Tlexdelta[cc]);
|
|
Tlexblkptr = blkptr;
|
|
|
|
/* Get the phonetic part for the word. */
|
|
|
|
Tlexphon();
|
|
|
|
return (TRUE);
|
|
}
|