antoine-source/appleworksgs/Spell/Src/TLEX.C
2023-03-04 03:45:20 +01:00

1 line
11 KiB
C
Executable File

/***********************************************************************\
Filename: Tlex.c
\***********************************************************************/
#include <memory.h>
#include "tlex.h"
#include "spmemory.h"
#include "tenviron.h"
#include "string.h"
#include "stdio.h"
#include "th.h"
/* What used to be the Tlex structure - now there's only one, so I just use
vars. */
extern int _SPActive[];
short Tlexenvcode; /* lexicon's environment code */
short Tlexscrev; /* revision of sc compatible with lexicon */
short Tlexprod; /* product code (shy, uh, etc.) */
short Tlexsegwords; /* number of words in a segment */
short Tlexnbsd; /* number of entries in BSD table */
short Tlexngram; /* number of entries in di-gram table */
short Tlexnindex; /* number of entries in the index table */
short Tlexnstrings; /* size of the string area in bytes */
/* locations of tables used by decompression routines */
char *Tlexstrings; /* actual memory area used for the strings */
char *Tlexbs; /* Backspaces for BSD's */
char **Tlexdelta; /* Deltas for BSD's */
char (*Tlexgramtab)[2]; /* di-grams */
char **Tlexindextab; /* block seed words */
/* minimum indices into compressed data tables */
int Tlexminbsd; /* byte code for first BSD */
int Tlexmingrm; /* byte code for first multi-gram */
/* miscellaneous */
HANDLE Tlexfile; /* file access handle */
char Tlexadjptr; /* pointers have been adjusted */
int Tlexoffset; /* block offset to first word of lexicon */
char *Tlexblk; /* location of decompression buffer start */
char *Tlexblkptr; /* current location in buffer */
/* Things I coded in assembly */
extern pascal int Tlexcmp();
/*extern pascal void Tlexstrip();*/
extern pascal void Tlexphon();
/*extern pascal int Tlexbinnext();*/
VOID Tlexstrip(eptr)
char *eptr; /* Points to the end of the string */
{
char *fptr; /* Points to the output buffer */
eptr -= 2;
fptr = Tscfdecomp;
Tsctag = 0;
/* If the word has no flags the return common flags */
if (ctoi(*eptr) != FLAGSIND)
{
*fptr++ = IW_COMMON;
*fptr = 0;
return;
}
/* Find the start of the flags */
while (ctoi(*(eptr -= 2)) == FLAGSIND)
;
eptr += 2;
/* Detach the flag from the word. */
*eptr++ = 0;
/* Move the flags to the flag buffer; move any tag to the tag
value. */
do
{
if (*eptr & IW_TAG)
Tsctag = ctoi(*eptr++);
else
*fptr++ = *eptr++;
}
while (*eptr++);
*fptr = 0;
/* If the word had only a tag, make it common. */
if (Tscfdecomp[0] == 0)
{
*fptr++ = IW_COMMON;
*fptr = 0;
}
}
/*
Opening the lexicon requires several steps. They are:
2) Open the lexicon file.
3) Read in the header information and verify that the lexicon is
valid for the current language and version of the spelling
components.
4) Allocate memory for and read in (or compute from data already
read) the tables needed to access the lexicon.
*/
Tlexalloc(fname)
char *fname; /* compressed lexicon to be opened */
{
extern char *Tlexgetstr();
char *strings; /* pointer to strings section */
/* Open the lexicon file and skip the copyright message. */
if ((Tlexfile = stdopen(fname, H_RDONLY)) == H_ERROR)
return (FALSE);
if (stdseek((long)COPYRBYTES, Tlexfile))
return (FALSE);
/* Read in the lexicon header and form the LEX structure from it.
Then check that the lexicon's language and the current language
match; and that the lexicon is the right version. */
if (bytread((char *) &Tlexenvcode, 2 * LEXHEADER, Tlexfile) == ERROR)
return (FALSE);
/* Allocate read the backspace table, gram table, and strings. */
if (abytread(&Tlexbs, (unsigned)Tlexnbsd, Tlexfile) == ERROR
|| abytread((char **)&Tlexgramtab, (unsigned)(Tlexngram << 1), Tlexfile)
== ERROR || abytread(&Tlexstrings, (unsigned)Tlexnstrings, Tlexfile) == ERROR)
return (FALSE);
/* Allocate the delta and index tables and initialize them with
pointers into the string table. */
if (!(strings = Tlexgetstr(Tlexstrings, &Tlexdelta, Tlexnbsd))
|| !Tlexgetstr(strings, &Tlexindextab, Tlexnindex))
return (FALSE);
/* determine offset in lexicon file of first byte of word data; this
is the total size of the header and its tables, rounded up to the
nearest logical block boundary. This code only works if
MM_BLKSIZE is a power of 2. */
Tlexoffset = (((COPYRBYTES + 2 * LEXHEADER - 1)
+ Tlexnbsd + (Tlexngram << 1) + Tlexnstrings)
& -MM_BLKSIZE) / MM_BLKSIZE + 1;
/* to determine which ranges of byte will acquire the meaning "BSD
byte" and "di-gram byte", specify the minimum value for each of
these ranges */
Tlexminbsd = NUMBACK + NPHON;
Tlexmingrm = Tlexminbsd + Tlexnbsd;
/* save an unnecessary subtraction during decompression by
pre-decrementing the BSD and di-gram pointers. */
Tlexadjptr = TRUE;
Tlexbs -= Tlexminbsd;
Tlexdelta -= Tlexminbsd;
Tlexgramtab -= Tlexmingrm;
return(TRUE);
}
/* Function to assign string pointers for strings read from the lexicon. */
STATIC char *
Tlexgetstr(sp, asp, len)
char *sp; /* Points into string table. */
char ***asp; /* Where to put allocated array. */
int len; /* Number of pointers. */
{
char **tp;
/* Allocate the space for the tables. */
if (!(tp = (char **) zalloc(_THID, len * sizeof(char *))))
return (NULL);
*asp = tp; /* Return the table address. */
/* Set the pointer address. */
while (--len >= 0)
{
*tp++ = sp;
while (*sp++)
;
}
return (sp); /* Return the next string address. */
}
/* Read a lexicon block from the current lexicon. */
STATIC int
Tlexread()
{
extern char *memread();
if ((Tlexblk = memread(Tsccurblk + Tlexoffset, Tlexfile, MM_READ | MM_PR2)) ==
NULL)
{
return (FALSE);
}
return (TRUE);
}
/* Free the lexicon stuff; close the file. */
VOID Tlexfree()
{
if (Tlexadjptr)
{
Tlexgramtab += Tlexmingrm;
Tlexbs += Tlexminbsd;
Tlexdelta += Tlexminbsd;
}
if (Tlexfile && !_SPActive[0])
stdclose(Tlexfile);
nzfree(Tlexstrings);
nzfree(Tlexindextab);
nzfree(Tlexgramtab);
nzfree(Tlexbs);
nzfree(Tlexdelta);
}
/* This function is passed the end of a word that has flags in it. These
flags are stripped off and stored in the Sc vars. */
/* Coded in assembly. (RAH) */
/* Compare a word with a word that may have flags on the end. The second
argument is the one that may have the flags. */
/* Coded in assembly. (RAH) */
/* Binary-search the index table to find the block where word can be found
if it occurs in the lexicon. Return the resulting block number. */
void Tlexblknum(word)
char *word;
{
int maxval; /* binary search limits */
int minval; /* binary search limits */
int i; /* index table entry to check */
minval = 0;
maxval = Tlexnindex - 1;
while (minval < maxval)
{
i = (maxval + minval + 1) >> 1;
if (Tlexcmp(word, Tlexindextab[i]) >= 0)
minval = i;
else
maxval = i - 1;
}
Tsccurblk = maxval;
}
/* Decompress (to) the phonetic part of a word. This is called for a segment
word or after lexbinnext has processed the bs or bsd before the phonetic
characters. When this routine exits, the decompression pointer will
point to the bs or bsd of the next word or to the null at the end of the
lexicon block. */
/* Coded in assembly. (RAH) */
/* Try to find word in the lexicon. Return the flags as found in the
lex. If the word is not found the the first character of the flags will
be null. */
Tlexword(word)
char *word; /* The word to look up */
{
char peword[2*MAXWORD]; /* phonetically encoded form */
/* word must fit in LONGWORD */
if (strlen((char *)word) >= LONGWORD)
{
return;
}
/* Look up the word in the lexicon. */
phfull(word, peword, Tlexprod & SHORTLEX);
return(Tlexfetch(peword));
}
/* Look up the flagged form of word in the lexicon. */
Tlexfetch(peword)
char *peword; /* The word to seek to */
{
int x;
char *blkptr;
int seg;
char *currptr;
char *nextptr;
/* Point to the block that the word will be in. */
Tlexblknum(peword);
/* Read the block. */
if (!Tlexread())
return (FALSE);
/* Find the segment which contains the query word. To do so, look
through the segment head words to find one larger than the
query. When that one is found, stop. */
blkptr = nextptr = Tlexblk;
currptr = blkptr + BLOCKSEGS;
for (seg = 0; seg < BLOCKSEGS && *blkptr; ++seg)
{
/* Make nextptr point to the start of the next segment. */
Tlexblkptr = nextptr += Tlexsegwords + ctoi(*blkptr++);
/* Decompress the first word of the segment. */
Tscendptr = Tscdecomp;
Tlexphon();
/* Stop when the query word is less than the segment word. */
if (strcmp(peword, Tscdecomp) <= 0)
break;
currptr = nextptr;
}
/* Currptr now points to the offset to start searching. */
Tlexblkptr = currptr;
/* If this is the first segment, initialize the decompression with
the index word for the block; otherwise, initialize it by getting
the first word from the segment. */
if (!seg)
{
Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]);
Tlexstrip(Tscendptr);
}
else
{
Tscendptr = Tscdecomp;
Tlexphon();
}
/* Scan the segment until a word equal to or greater than
the desired word is found. */
while ((x = strcmp(peword, Tscdecomp)) > 0)
if (!Tlexbinnext())
return (FALSE);
return (!x);
}
/* Decompress the next word from the lexicon. */
/* Coded in assembly. (RAH) */
/* Find and read the next block of the lexicon. Decompress the first word
of that block. */
Tlexnextblk()
{
if (++Tsccurblk >= Tlexnindex || !Tlexread())
return (FALSE);
Tlexblkptr = Tlexblk + BLOCKSEGS;
/* copy in new seed word and return pointer to end */
Tlexstrip(Tscendptr = strecpy(Tscdecomp, Tlexindextab[Tsccurblk]));
return (TRUE);
}
Tlexbinnext()
{
int cc;
char *blkptr;
/* If at the end of the current block, read the next. */
blkptr = Tlexblkptr;
if ((cc = ctoi(*blkptr++)) == 0)
return (Tlexnextblk());
/* Apply the backspace or bsd. */
if (cc < Tlexminbsd)
{
Tscendptr -= cc - NPHON;
/* look for dual backspace sequence */
if (cc == NPHON + (NUMBACK - 1))
Tscendptr -= ctoi(*blkptr++) - NPHON;
}
else
Tscendptr = strecpy(Tscendptr-Tlexbs[cc], Tlexdelta[cc]);
Tlexblkptr = blkptr;
/* Get the phonetic part for the word. */
Tlexphon();
return (TRUE);
}