2023-03-04 03:45:20 +01:00

1 line
37 KiB
C
Executable File

/***********************************************************************\
Filename: thes.c
\***********************************************************************/
#include <string.h>
#include "proxio.h"
#include "tlex.h"
#include "tenviron.h"
#include "scdef.h"
#include "thesmisc.h"
#include "thes.h"
#include "spmemory.h"
#include "th.h"
static thsinit();
static thesfetch();
static savemean();
static savedir();
static skipmean();
static findcode();
static theshuff();
static long binloc();
static getmean();
static setqflags();
static nextlist();
static getshtab();
static END *getstage();
static isinf();
static getstrtab();
static delunf();
static badunf();
static goodpos();
#define INFALT " or "
#define MAXSTR 256
#define HUFFCH 129
#define MEAN 0
#define INIT 1
#define DEFS 2
/* Thes structure provides access to all the thesaurus
query informataion and the results of a lookup. */
short Thesprod; /* product type */
short Theswrev; /* word list type used */
short Thesnibin; /* size of ibin index table */
short Thesniadd; /* size of ioverflow add table */
short Thesnmbin; /* size of mbin index table */
short Thesnmadd; /* size of moverflow add table */
short Theslcode; /* length of longest huffman code */
short Thesnchars; /* number of entries in Thesxlate */
short Thesstrsiz; /* max. length of the lists */
short Thesmaxdef; /* length of longest definition */
short Thesmaxhash; /* max. number of words in the lists */
short Thesnumstr; /* no. of encoded words in the definitions */
short Thestotstr; /* */
short Thesdirfld; /* length of the direct hashes */
short Thesposfld; /* length of the POS field */
char *Theshuffstr;
char **Theshuffind;
/* thesaurus tables */
unsigned *Thesibin; /* thesaurus ibin index tables */
unsigned *Thesiadd; /* ioverflow add table */
unsigned *Thesmbin; /* thesaurus ibin index tables */
unsigned *Thesmadd; /* ioverflow add table */
unsigned *Thescnttab; /* counts for each huffman code */
char *Thesxlate; /* huffman translation table */
long Thesidata; /* offset to indirect table data */
long Thesmdata; /* offset to meaning data */
/* thesaurus information and control fields */
long Thqcontrol; /* thesaurus access control */
long Thqdesc; /* hash value for the query */
long Thqcuraddr; /* current address */
int Thqnstrarr; /* string array size */
int Thqtblind; /* index to the descriptor table */
int Thqendtab; /* */
short Thqstate; /* thesaurus access state */
char Thqcount; /* count of meanings in Thqmean */
char Thqspecial; /* if the query is a special */
char Thqphrase; /* if the query is a phrase */
char Thqquery[LONGWORD]; /* query word, after doflags */
char Thqunfquery[LONGWORD]; /* unflected word(s) */
char *Thqmultunf[MAXUNF]; /* pointer(s) to unflected word(s) */
short Thqunfindex[MAXUNF]; /* index of unflected meanings */
short Thqunfpos[MAXUNF]; /* part of speech from unflection */
short Thqnumdesc[MAXUNF]; /* */
long Thqworddesc[MAXUNF]; /* hash value for the unflections */
END *Thqstage[MAXUNF]; /* unflection stage entries */
int Thqindtab[MAXMEAN * 2]; /* indirect pointer table */
long Thqaddr[MAXMEAN]; /* address of the meanings */
/* memory to hold one meaning of a word looked up in thesaurus */
char Thmpos; /* part of speech number */
char Thmmeanno; /* meaning number in thesaurus */
char Thmcount[MAXLISTS]; /* count for each list */
char **Thmlist[MAXLISTS]; /* headers to NULL terminated lists */
char **Thmstrarr; /* pointers to the words in the lists */
char *Thmstrings; /* thesaurus strings */
char *Thmstrend;
char *Thmsentinel;
int Thmhstart[MAXLISTS];
long *Thmhashes; /* pointer to hashes */
static int Qflag;
static char *Unfquery;
static END *Stageptr;
/* Read in the thesaurus bin tables and the overflow tables
which are length prepended table of short values. Allocate
space for the strings and the hashes. Also, determine the
offset to the start of the meaning bins. */
thesopen()
{
long binloc();
long thesbit();
void thesclose();
char *strptr;
int i;
HANDLE fptr;
int tabsize;
int strsize;
int maxhash;
long offset;
long unfoff;
char str[sizeof(unsigned) * THESHEAD];
offset = (long)(Tlexoffset + Tlexnindex) * MM_BLKSIZE;
fptr = Tlexfile;
if (thesblk(fptr, offset, 1) == ERROR)
return (FALSE);
/* Read in the unflection exceptions. */
if (!(unfoff = unfopen(fptr)))
{
thesclose();
return (FALSE);
}
offset += unfoff;
i = THESHEAD * sizeof(short);
strptr = (char *) str;
while (--i >= 0)
*strptr++ = thesbit(fptr, 8);
/* call intmove */
thintmv((char *) str, &Thesprod, THESHEAD);
/* Read the thesaurus bin tables. */
if (!getshtab(fptr, &Thesibin, Thesnibin)
|| !getshtab(fptr, &Thesiadd, Thesniadd)
|| !getshtab(fptr, &Thesmbin, Thesnmbin)
|| !getshtab(fptr, &Thesmadd, Thesnmadd))
{
thesclose();
return (FALSE);
}
tabsize = Theslcode * sizeof(unsigned);
if ((Thescnttab = (unsigned *) zalloc(_THID, (unsigned)tabsize)) == NULL)
{
thesclose();
return (FALSE);
}
i = 0;
while (i < Theslcode)
Thescnttab[i++] = thesbit(fptr, 8) << 8;
/* Adjust the value for the last table entry. */
Thescnttab[Theslcode - 1] += 0x100;
tabsize = Thesnchars;
if ((Thesxlate = (char *) zalloc(_THID, (unsigned)tabsize)) == NULL)
{
thesclose();
return (FALSE);
}
strptr = Thesxlate;
i = Thesnchars;
while (--i >= 0)
*strptr++ = thesbit(fptr,8);
offset += THESHEAD * sizeof(short) + Theslcode + Thesnchars
+ 2 * (Thesnibin + Thesniadd
+ Thesnmbin + Thesnmadd);
/* Adjust last values in the bins. */
Thesnibin--;
Thesnmbin--;
if ((Theshuffstr =
(char *) zalloc(_THID, (unsigned)Thestotstr)) == NULL
|| (Theshuffind = (char **) zalloc(_THID, (unsigned)
(sizeof(char *) * Thesnumstr))) == NULL)
{
thesclose();
return (FALSE);
}
/* Read in the encoded words (used by theshuff). */
offset += getstrtab(fptr);
/* Save the offset to start of thesaurus indirect bins. */
Thesidata = offset;
/* Determine offset of meaning data. */
Thesmdata = binloc(Thesnibin, Thesidata,
Thesibin, Thesiadd, Thesniadd);
strsize = Thesstrsiz + Thesmaxdef;
maxhash = Thesmaxhash + 1;
/* Allocate space for the strings, the pointers
to the strings and the hash table. */
if ((Thmstrarr = (char **) zalloc(_THID, (unsigned)(sizeof (char *) *
(maxhash + MAXLISTS)))) == NULL || (Thmstrings
= (char *) zalloc(_THID, (unsigned)(strsize + MAXWORD))) == NULL
|| (Thmhashes =
(long *) zalloc(_THID, (unsigned)(sizeof(long) * maxhash))) == NULL)
return (FALSE);
Thqnstrarr = maxhash + MAXLISTS;
Thmsentinel = Thmstrings + strsize;
Thqstate = TH_INIT;
return (TRUE);
}
/* Close an open thesaurus. */
void thesclose()
{
unfclose();
nzfree((char *) Thmstrarr);
nzfree((char *) Thmstrings);
nzfree((char *) Thmhashes);
nzfree((char *) Thesxlate);
nzfree((char *) Thescnttab);
nzfree((char *) Thesmadd);
nzfree((char *) Thesiadd);
nzfree((char *) Thesibin);
nzfree((char *) Thesmbin);
nzfree((char *) Theshuffstr);
nzfree((char *) Theshuffind);
}
/* Thesword() is the main interface to the thesaurus function.
This function gathers information for "nummean" meanings
starting from the meaning number "start" and fills the
array "defs" with the definitions. The returned value
is the number of meanings actually retrieved. */
thesword(query)
char *query;
{
long theswtoh();
char *strptr;
HANDLE fptr;
static int index;
int allpos;
short meanreq;
char tmpstr[LONGWORD];
int nummean = 40;
if (strlen(query) >= MAXWORD)
return (FALSE);
meanreq = 40;
if ((Qflag = doflags((char *) query, tmpstr, MAXWORD)) == ERROR)
return (FALSE);
strecpy(Thqquery, tmpstr);
Thqstate = TH_INIT;
Thqspecial = Qflag & IW_SPECIAL;
switch (Thqstate)
{
case TH_INIT:
if (!thsinit((char *) query, &allpos))
return (FALSE);
case TH_MORE:
savemean(0, &nummean);
if (Thqstate == TH_DONE || !nummean)
return (meanreq - nummean);
else
{
Thqunfindex[0] = Thqcount;
Thqstate = TH_UNFL;
}
/* Try the unflected form of the word. */
strecpy(Thqunfquery, Thqquery);
if (!unflect(Thqunfquery, Thqmultunf, Thqstage))
{
Thqstate = TH_DONE;
return (meanreq - nummean);
}
if (Thqspecial && Thqphrase && doflags(Thqunfquery,
tmpstr, MAXWORD) != ERROR)
{
strecpy(Thqunfquery, tmpstr);
Thqmultunf[1] = NULL;
}
index = 0;
Stageptr = Thqstage[index];
/**************************************************************
* Remove incorrect (or unwanted) unflections. *
* An unflected form is to be discarded if: *
* 1. it is not in the lexicon (an unlikely occurence). *
* 2. the type of unflection is not desired. *
* 3. it has no meanings in the thesaurus. *
**************************************************************/
while ((Unfquery = Thqmultunf[index]) != NULL)
{
Thqunfpos[index] = Stageptr->en_pos;
if ((Thqworddesc[index]
= theswtoh((char *) Unfquery)) < 0
|| badunf(index, allpos)
|| thesfetch(Thqworddesc[index]) == 0)
delunf(index);
else
Thqnumdesc[++index] = Thqendtab;
Stageptr = Thqstage[index];
}
/* If the word is a phrase & a special, Thqunfquery must
be cleaned up to remove the flagged form. */
if (Thqphrase && Thqspecial)
{
strptr = Thqunfquery;
while (*strptr)
if (*strptr++ < ' ')
{
strecpy(tmpstr, strptr);
strecpy(Thqunfquery, tmpstr);
break;
}
}
index = 0;
Unfquery = Thqmultunf[index];
case TH_UNFL:
while ((Stageptr = Thqstage[index]) != NULL)
{
if (Thqcount > Thqunfindex[0])
switch (Stageptr->en_rel)
{
case VB_NN:
case AJ_AV:
if (index > 0)
{
delunf(index);
continue;
}
default:
break;
}
Thqcontrol &= ~TH_ALLPOS;
Thqcontrol |= Thqunfpos[index];
savemean(index + 1, &nummean);
if (Thqstate == TH_DONE)
return (meanreq - nummean);
else if (!nummean)
return (meanreq);
/* If no meanings are found for an unflected form, remove it. */
if (Thqunfindex[index] == Thqcount)
delunf(index);
else
Thqunfindex[++index] = Thqcount;
}
Thqstate = TH_DONE;
case TH_DONE:
return (meanreq - nummean);
default:
break;
}
return (FALSE);
}
static thsinit(query, allpos)
char *query;
int *allpos;
{
long theswtoh();
char *strecpy();
char *strptr;
int i;
char *start;
char tmpstr[LONGWORD];
Thqcontrol = TH_ALLPOS + TH_ALLLIST + TH_INFSYN;
/* Initialize all values. */
Stageptr = NULL;
for (i = MAXUNF; --i >= 0;)
{
Thqunfpos[i] = NPOS;
Thqunfindex[i] = MAXMEAN;
Thqnumdesc[i] = MAXMEAN;
Thqworddesc[i] = ERROR;
Thqstage[i] = NULL;
Thqmultunf[i] = NULL;
}
Unfquery = NULL;
Thqcount = 0;
Thqunfquery[0] = '\0';
Thqtblind = 0;
Thqendtab = 0;
Thqphrase = FALSE;
for (i = Thqnstrarr; --i >= 0;)
Thmstrarr[i] = NULL;
/* Obtain the hash value for the query word. */
if ((Thqdesc = theswtoh((char *) Thqquery)) < 0)
{
/* If the query is an inflected phrase, check
the component words for validity. */
if (Thqspecial)
{
strptr = strecpy(Thqquery, query) - 1;
if (*strptr == '.')
*strptr = '\0';
}
start = Thqquery;
do
{
strptr = start;
while (*strptr && *strptr != ' ')
++strptr;
if (*strptr == ' ')
Thqphrase = TRUE;
if (*strptr == '\0')
{
doflags(start, tmpstr, MAXWORD);
if (!Thqphrase
|| theswtoh((char *) tmpstr) < 0)
return (FALSE);
}
else
{
*strptr = '\0';
doflags(start, tmpstr, MAXWORD);
if (theswtoh((char *) tmpstr) < 0)
return (FALSE);
*strptr = ' ';
start = strptr + 1;
}
}
while (*strptr);
}
/* At this stage, an acceptable query must either be a valid
phrase or a word in the lexicon. If neither, return failure. */
if (Thqdesc < 0 && !Thqphrase)
return (FALSE);
/* Seek to start of thesaurus info for the
word and get the indirect pointers. */
if (Thqphrase && Thqdesc < 0)
Thqnumdesc[0] = 0;
else
Thqnumdesc[0] = thesfetch(Thqdesc);
/* Set the thesaurus state to indicate the
presence of information for the query. */
Thqstate = TH_MORE;
return (TRUE);
}
/* Given a word descriptor, fetch all its meaning descriptors. */
static thesfetch(worddesc)
long worddesc;
{
long binloc();
HANDLE fptr;
long binend;
long binstart;
int binid;
fptr = Tlexfile;
/* Figure out where bin starts and how long it is. */
binid = binnum(worddesc);
binstart = binloc(binid, Thesidata, Thesibin,
Thesiadd, Thesniadd);
binend = binloc(binid + 1, Thesidata, Thesibin,
Thesiadd, Thesniadd);
/* If bin length is zero, the query word can't be in it. */
if (binend == binstart)
return (FALSE);
/* Seek to the start of bin. */
thesblk(fptr, binstart, 1);
/* Decompress the bin until the search
key for this worddesc is found. */
return (findcode(fptr, (int) codeval(worddesc),
(int) (binend - binstart)));
}
/* Save the addresses of the meaning descriptors that are valid
for the query and fill the "defs" array with the definitions. */
static savemean(inddesc, nummean)
int inddesc;
int *nummean;
{
int numsaved;
numsaved = 0;
while (Thqtblind < Thqnumdesc[inddesc] && *nummean) {
if (savedir(Thqindtab[Thqtblind++],
(int)Thqcount)) {
Thqaddr[Thqcount++] = Thqcuraddr;
--(*nummean);
++numsaved;
}
if (Thqcount >= MAXMEAN) {
Thqstate = TH_DONE;
break;
}
}
return (numsaved);
}
/* Save all the hashes that correspond to an indirect table entry */
static savedir(inddesc, meanno)
int inddesc;
int meanno;
{
long binloc();
long addr;
HANDLE fptr;
int listnib;
int mbinid;
int meanid;
long mbinstart;
char defstr[MAXDEF];
fptr = Tlexfile;
/* Locate the bin that contains the desired meaning. */
mbinid = meanbin(inddesc);
meanid = meannum(inddesc);
mbinstart = binloc(mbinid, Thesmdata, Thesmbin,
Thesmadd, Thesnmadd);
/* Skip meanings until the desired meaning is reached. */
thesblk(fptr, mbinstart, 1);
addr = mbinstart * 8;
while (--meanid >= 0)
{
listnib = (int) thesbit(fptr, Thesposfld) & LISTBITS;
addr += Thesposfld;
addr += skipmean(listnib, fptr);
}
/* Save the address of the start of the meaning. If this
meaning is acceptable, the address will be stored in
the field tq_addr[] (of the Thes structure). */
Thqcuraddr = addr;
addr += getmean(fptr, meanno, defstr, INIT);
if (!goodpos((int)Thmpos) && !thpostst(Thmpos))
return (FALSE);
return (TRUE);
}
/* Skip to the end of a meaning. The returned value
is the number of bits that were skipped. */
static skipmean(listnib, fptr)
int listnib;
HANDLE fptr;
{
int offset;
int listcnt;
int skipcount;
int listno;
skipcount = 0;
offset = 0;
listno = 0;
do
do
{
listcnt = thesbit(fptr, LISTFLD);
offset += LISTFLD;
skipcount += listcnt;
}
while (listcnt == LISTMAX);
while ((listno = nextlist(listno, listnib)) < MAXLISTS);
skipcount *= Thesdirfld;
skipcount += (int) thesbit(fptr, 8) * 8;
offset += 8 + skipcount;
thesskip(fptr, skipcount);
return (offset);
}
/* Given the search key byte, determine from the bin header the number
of records to be skipped. No meanings are available if the bit
corresponding to "code" is not set (in the header). If the presence
of meanings is indicated, get all the meaning pointers. */
static findcode(fptr, code, binlen)
HANDLE fptr;
int code;
int binlen;
{
char cdmask;
int skip;
int inddesc;
int todo;
int endtab;
todo = IBINHDR;
skip = 0;
endtab = Thqendtab;
binlen *= 8;
while (TRUE)
{
cdmask = (unsigned) thesbit(fptr, 8);
--todo;
if ((code -= 8) < 0)
{
code += 8;
break;
}
do
if (0x80 & cdmask)
++skip;
while (cdmask <<= 1);
}
while (--code >= 0)
{
if (0x80 & cdmask)
++skip;
cdmask <<= 1;
}
if (!(0x80 & cdmask))
return (FALSE);
thesskip(fptr, todo << 3);
while (--skip >= 0)
{
do
if ((binlen -= INDFLD) < 0)
return (FALSE);
while (!eofind(thesbit(fptr, INDFLD)));
}
while (TRUE)
{
Thqindtab[endtab++] = inddesc
= (int) thesbit(fptr, INDFLD);
if (eofind(inddesc))
break;
}
/* Mark end of indirect pointer table. */
Thqindtab[endtab] = ERROR;
return (Thqendtab = endtab);
}
/* Theshuff will use the huffman tables for the thesaurus
and decompress the definition string from current file
position. Note that the definition is '\0' code terminated,
and is not byte aligned on either direction. It however
is preceded by the skip byte, which by this stage must
have already been passed. */
static theshuff(fptr, start, skipcount)
HANDLE fptr;
char *start;
int skipcount;
{
int scratch; /* Difference between input and code */
int index; /* Index into translation table */
int bitsleft; /* Number of bits left in current byte */
int currchar; /* Current input byte being processed */
int nextchar; /* Next input byte to be processed */
unsigned *table;
char *xlate;
char *str;
int newch;
int i;
/* Initialize the unpacking variables. */
str = start;
scratch = 0L;
bitsleft = 0;
table = Thescnttab;
xlate = Thesxlate;
nextchar = thesbit(fptr, 8);
--skipcount;
do
{
index = 0;
scratch &= 0xFF;
/* Loop through the entire table, if necessary. */
for (i = 0; i < MAXBITS; i++)
{
/* Assuming we don't match anything of length i,
add the current table entry to the index. */
index += table[i];
/* If the current byte is exhausted, fetch another. */
if (!bitsleft--)
{
bitsleft = 7;
currchar = nextchar;
if (--skipcount >= 0)
nextchar = thesbit(fptr, 8);
scratch |= currchar;
}
/* Shift the next bit in. If we have received
a code of length i, scratch will be less than
the current table entry, and will correspond
to code number table[i] - scratch of length i. */
scratch <<= 1;
scratch -= table[i];
/* If scratch is less than zero, we have received
code number - scratch of length i. Adding that
to index should give us the absolute index into
the translate table. */
if (scratch < 0L)
{
index += (int) scratch;
break;
}
}
newch = ctoi(xlate[(index >> 8) & 0xFF]);
if (newch < HUFFCH)
*str = newch;
else
{
if (str != start && *(str - 1) != ' ')
*str++ = ' ';
str = (char *) strecpy((char *) str,
Theshuffind[newch - HUFFCH]);
*str = ' ';
}
}
while (*str++ != '\0');
if (*(str -= 2) == ' ')
*str = '\0';
}
/* Given a thesaurus bin, return its byte location in the thesaurus file. */
static long binloc(binid, offset, table, addt, naddt)
int binid;
unsigned *table;
unsigned *addt;
int naddt;
long offset;
{
unsigned *lastentry;
/* Add overflow for this bin number. */
lastentry = addt + naddt;
while (addt < lastentry && binid >= *addt++)
offset += OVERFLOW;
/* Return the bin index value. */
return (offset + (long) table[binid]);
}
/* Fetch the information pertaining to a meaning. If skip
is TRUE, only the definition will be fetched. */
static getmean(fptr, meanno, defstr, skip)
HANDLE fptr;
int meanno;
char *defstr;
int skip;
{
int listno;
int listcnt;
int unfmean;
int ind;
int listnib;
int totcount;
int totskip;
int huffskip;
int skipcount;
int offset;
int strcnt;
int hashcnt;
long blkaddr;
long worddesc;
long hash;
char unfstr[MAXSTR * 2];
/* Initialize all the counts. */
hashcnt = strcnt = 0;
ind = 0;
skipcount = 0;
unfmean = FALSE;
/* Determine the word descriptor for which it is a meaning. */
while (ind < MAXUNF)
if (meanno < Thqunfindex[ind++])
break;
ind -= 2;
if (ind < 0)
worddesc = Thqdesc;
else
{
unfmean = TRUE;
worddesc = Thqworddesc[ind];
}
if (skip != INIT)
{
blkaddr = Thqaddr[meanno];
offset = blkaddr & 0x7;
blkaddr >>= 3;
thesblk(fptr, blkaddr, 1);
thesbit(fptr, offset);
}
Thmpos = (int) thesbit(fptr, Thesposfld);
totskip = Thesposfld;
listnib = Thmpos & LISTBITS;
Thmpos &= POSBITS;
Thmmeanno = meanno;
for (listno = 0; listno < MAXLISTS; listno++)
Thmcount[listno] = 0;
listno = 0;
do
do
{
listcnt = thesbit(fptr, LISTFLD);
Thmcount[listno] += listcnt;
skipcount += listcnt;
}
while (listcnt == LISTMAX);
while ((listno = nextlist(listno, listnib)) < MAXLISTS);
skipcount *= Thesdirfld;
/* Decompress the meaning definition into the string section */
Thmstrend = defstr;
*Thmstrend++ = Thmpos + 1;
if (unfmean)
{
*Thmstrend++ = Thqstage[ind]->en_rel;
Thmstrend =
strecpy(Thmstrend, Thqmultunf[ind]) + 1;
}
else
*Thmstrend++ = FALSE;
huffskip = (int) thesbit(fptr, 8);
theshuff(fptr, (char *) Thmstrend, huffskip);
if (Thqcontrol & TH_UNFDEF)
Thmstrend = strchr(Thmstrend, 0) + 1;
else
{
dispunf(Thqquery, unfstr, meanno + 1, defstr);
Thmstrend = strecpy(defstr, unfstr) + 1;
}
totskip += skipcount + 8 + huffskip * 8;
/* If skip is TRUE, seek to the end of the record */
if (skip)
{
thesskip(fptr, skipcount);
return (totskip);
}
/* For each list that is present, save the hash values.
Mark the indices of both the tm_strarr and tm_hashes
arrays in positions where they belong. */
for (listno = 0; listno < MAXLISTS; listno++)
{
/* The current hash count is saved to indicate
where the values for the current list start. */
Thmhstart[listno] = hashcnt;
totcount = Thmcount[listno];
/* The hashes themselves are now saved in the
Hashes array, except when the hash matches the
current query's hash. */
for (listcnt = 0; listcnt < totcount; listcnt++)
{
hash = thesbit(fptr, Thesdirfld);
if (hashcnt < MAXHASH && hash != worddesc)
Thmhashes[hashcnt++] = hash;
else
--Thmcount[listno];
}
/* The Thes area is updated to indicate where exactly
in the string pointer array the strings for the
current list begin. */
Thmlist[listno] = Thmstrarr + strcnt;
strcnt += Thmcount[listno];
Thmstrarr[strcnt++] = NULL;
}
return (totskip);
}
/* Convert a portion of hash values into strings; this will enable
parallel display/fetch implementation for space/time critical output.
The function will return a pointer to the definition string if the
meaning (whose number is meanno) is found, or NULL in case of a failure. */
char *thesinfo(meanno, lists)
int meanno;
char ***lists;
{
END *getstage();
int j;
int k;
int thp;
int qflag; /* flags for the query */
int flag; /* flags for words in the lists */
int posflag; /* part of speech of the meaning */
int listreq;
int doinf;
int listcnt;
char **listptr;
char phword[LONGWORD]; /* phonetically encoded lex word */
char infword[LONGWORD]; /* phonetically encoded lex word */
char wordbuf[LONGWORD]; /* buffer */
if (--meanno >= Thqcount)
return(NULL);
/* Use the state bits of thesquery */
switch (Thqstate)
{
case TH_DONE:
case TH_UNFL:
case TH_MORE:
if (meanno >= Thqcount || meanno < 0)
{
Thqstate = TH_DONE;
return (NULL);
}
getmean(Tlexfile, meanno, Thmstrings, MEAN);
listreq = (Thqcontrol & TH_ALLLIST) >> NPOS;
doinf = thinftst(meanno, Thqunfindex[0]);
if (Thmpos == NOUN || Thmpos == SPNOUN)
posflag = NOUN;
else if (Thmpos == VERB || Thmpos == VERBSP)
posflag = VERB;
else
posflag = NPOS;
break;
default:
return (NULL);
}
qflag = Qflag & (IW_COMMON | IW_PROPER | IW_ENDDOT);
/* Find the unflection stage, if any. */
Stageptr = getstage(meanno, (int)Thmpos, Thqunfindex,
Thqstage);
/* Convert all the hashes that we collected into strings.
The string pointers get saved as we go along in places where
Also, all hashes that were looked up are marked with HS_SEEN bit.
*/
for (j = 0; j < MAXLISTS; j++)
{
if (!(listreq & 1 << j))
{
*Thmlist[j] = NULL;
continue;
}
*lists++ = (char **) Thmlist[j];
listptr = Thmlist[j];
thp = Thmhstart[j];
listcnt = Thmcount[j];
for (k = 0; k < listcnt; k++)
{
Thmhashes[thp] |= HS_SEEN;
/* if hash decoding error, just ignore the hash */
if (theshash(Thmhashes[thp++] & DIRBITS,
phword) == ERROR)
continue;
flag = ctoi(Tscfdecomp[0]);
lextoasc(phword, wordbuf, flag);
if (doinf)
{
if (flag == IW_PROPER)
{
if (doflags(wordbuf, infword,
MAXWORD) == ERROR)
{
Thmcount[j]--;
continue;
}
strecpy(wordbuf, infword);
}
if (inflect(wordbuf, infword, posflag,
Stageptr))
{
if ((flag == IW_PROPER)
&& Stageptr->en_rel != RE_CM
&& Stageptr->en_rel != RE_SU)
undoflags(infword, flag, wordbuf);
else
strecpy(wordbuf, infword);
}
else
{
Thmcount[j]--;
continue;
}
}
/* copy the word in wordbuf (transformed, if
necessary, to the format of the query) to
Thmstrend.
*/
setqflags(wordbuf, qflag, flag, doinf);
*listptr++ = Thmstrend;
Thmstrend = strchr(Thmstrend, 0) + 1;
/* If string space is exhausted, return graciously */
if (Thmstrend >= Thmsentinel)
return (NULL);
}
*listptr = NULL;
}
return ((char *) Thmstrings);
}
static setqflags(inword, qflag, flag, inf)
char *inword;
int qflag;
int flag;
int inf;
{
char *ep;
char *ss;
char *op;
int savech;
int setnew;
/* check if the flags corresponding to the query have to be
transferred to inword. */
if (setnew = (qflag & IW_DOTS) > (flag & IW_DOTS)
|| (qflag & IW_CASE) > (flag & IW_CASE))
flag = qflag & ~(flag & IW_ENDDOT);
if (!inf)
{
if (setnew)
undoflags(inword, flag, Thmstrend);
else
strecpy(Thmstrend, inword);
return;
}
ss = ep = inword;
op = Thmstrend;
while (TRUE)
{
/* If inword is a multiple word (e.g. broke/broken,
leveled or levelled) apply the new flag to each word.
*/
if (!*ep || *ep == UNFSEP || *ep == INFSEP)
{
savech = *ep;
*ep++ = 0;
if (setnew)
{
undoflags(ss, flag, op);
op = strchr(op, 0);
}
else
op = strecpy(op, ss);
if (!savech)
return;
else if (savech == UNFSEP)
*op++ = UNFSEP;
else
op = strecpy(op, (char *) INFALT);
ss = ep;
continue;
}
++ep;
}
}
/* Get the next list's number from the list nibble section */
static nextlist(listno, pos)
int listno;
int pos;
{
while (listno < MAXLISTS && !(pos & HAS_CMP << listno))
listno++;
return (listno + 1);
}
/* Read in a table of short values. The space required for
the table is allocated by calls to zalloc(). The functions
returns FALSE if it is unable to allocate sufficient space.
*/
static getshtab(fptr, table, lentable)
HANDLE fptr;
unsigned **table;
int lentable;
{
int lobyte;
int hibyte;
int tabsize;
int i;
unsigned *tmptab;
if (lentable == 0)
return (TRUE);
tabsize = lentable * sizeof(unsigned);
if ((tmptab = (unsigned *) zalloc(_THID, (unsigned)tabsize)) == NULL)
{
return (FALSE);
}
*table = tmptab;
/* read a table of shorts */
for (i = 0; i < lentable; i++)
{
lobyte = thesbit(fptr, 8);
hibyte = thesbit(fptr, 8);
*tmptab++ = ctoi(lobyte) | (ctoi(hibyte) << 8);
}
/* return number of entries in table */
return (TRUE);
}
/* Determine the unflection stage for a meaning */
static END *getstage(meanno, pos, unfindex, stage)
int meanno;
int pos;
short *unfindex;
END **stage;
{
END *unfstage();
int ind;
END *stgptr;
ind = 0;
stgptr = NULL;
if (meanno < unfindex[ind++])
return (stgptr);
while (ind < MAXUNF)
{
if (meanno < unfindex[ind++])
break;
}
ind -= 2;
if (ind != MAXUNF)
{
stgptr = stage[ind];
if (pos == VERBSP && stgptr->en_rel == PR_PX)
stgptr = unfstage(SV_PX + 1);
else if (pos == SPNOUN && stgptr->en_rel == SI_PL)
stgptr = unfstage(SP_PL + 1);
}
return (stgptr);
}
/* Inflect can produce multiple inflections separated by UNFSEP.
Check if "word" matches any of the inflections in "infword".
*/
static isinf(word, infword)
char *word;
char *infword;
{
char *wrdptr;
wrdptr = infword;
while (*wrdptr != '\0')
if (*wrdptr++ == UNFSEP)
{
*(wrdptr - 1) = '\0';
if (!strcmp(word, infword))
return (TRUE);
word = wrdptr;
}
return (!strcmp(word, infword));
}
/* Read in the strings in the definitions that were replaced by
non-printing characters prior to huffman encoding.
*/
static getstrtab(fptr)
HANDLE fptr;
{
char *strptr;
int retval;
int i;
i = 0;
retval = 0;
strptr = Theshuffstr;
while (i < Thesnumstr)
{
Theshuffind[i++] = strptr;
while (TRUE)
{
retval++;
if ((*strptr++ = thesbit(fptr,8)) == '\n')
break;
}
*(strptr - 1) = '\0';
}
return (retval);
}
/* Remove all values relating to a particular unflection.
The returned value is the number of meanings that will
be lost as a result of discarding the unflection.
*/
static delunf(ind)
int ind;
{
while (ind < MAXUNF && Thqmultunf[ind] != NULL)
{
Thqmultunf[ind] = Thqmultunf[ind + 1];
Thqstage[ind] = Thqstage[ind + 1];
Thqunfpos[ind] = Thqunfpos[ind + 1];
Thqworddesc[ind] = Thqworddesc[ind + 1];
ind++;
if (ind < MAXUNF - 1)
Thqnumdesc[ind] = Thqnumdesc[ind + 1];
}
}
/* Check an unflection stage entry against the controls used
to retrieve information. This function is invoked only if
TH_ALLPOS is not set, signifying that only certain parts
of speech are desired. */
static badunf(ind, allpos)
int ind;
int allpos;
{
register END *stage;
register char unfpos;
int noun;
int verb;
if (Thqunfindex[0] > 0) {
if ((stage = Thqstage[ind]) == NULL) {
return (TRUE);
}
switch (stage->en_rel) {
case PR_PS :
case PR_PP :
case PR_PX :
case PR_PC :
case SV_PX :
case SI_PL :
case SP_PL :
case TP_SI :
case RE_CM :
case RE_SU :
case TP_SP :
case TP_FP :
break;
default:
return (TRUE);
}
}
noun = Thqcontrol & (TH_NOUN | TH_SPNOUN);
verb = Thqcontrol & (TH_VERB | TH_VERBSP);
if (ind > 0
&& Thqcount != Thqunfindex[0]
&& Thqstage[ind] == Thqstage[ind - 1]
&& (stage->en_rel == PR_PC || stage->en_rel == PR_PX)) {
return (TRUE);
}
/* If all parts of speech are desired, preserve the unflection */
if (allpos) {
return (FALSE);
}
if ((stage->en_pos & TH_NOUN)
&& (stage->en_pos & (TH_VERB | TH_VERBSP))
&& !(noun && verb)) {
if (noun) {
Thqstage[ind]++;
} else {
Thqstage[ind] += 3;
}
Thqunfpos[ind] = Thqstage[ind]->en_pos;
}
if (Thqcontrol & Thqstage[ind]->en_pos) {
return (FALSE);
}
/* Make allowance for those unflections which involve a change
in the part of speech, e.g. adjective -> noun */
switch (Thqstage[ind]->en_rel) {
case AJ_NN:
unfpos = TH_NOUN | TH_SPNOUN;
break;
case AJ_AV:
unfpos = TH_ADV;
break;
case VB_AJ:
unfpos = TH_ADJ;
break;
case VB_NN:
unfpos = TH_NOUN | TH_SPNOUN;
break;
default:
return (TRUE);
}
if (Thqcontrol & unfpos) {
return (FALSE);
}
return (TRUE);
}
/* Determine if a particular part of speech is acceptable even
if Thqcontrol indicates that it is not. This check is
relevant only for those meanings that result from an unflection
of the query.
*/
static goodpos(pos)
int pos;
{
END *unfstage();
UCHAR infword[MAXWORD];
if (Thqstate != TH_UNFL) {
return (FALSE);
}
switch (pos) {
case VERBSP:
if (!((Thqcontrol & TH_VERB)
&& Stageptr->en_rel == PR_PX)) {
return (FALSE);
}
Stageptr = unfstage(SV_PX + 1);
break;
case SPNOUN:
if (!((Thqcontrol & TH_NOUN)
&& Stageptr->en_rel == SI_PL)) {
return (FALSE);
}
Stageptr = unfstage(SP_PL + 1);
break;
default:
return (FALSE);
}
if (unfexc(Unfquery, infword, Stageptr->en_rel) > 0) {
return (isinf(Thqquery, infword));
}
return (TRUE);
}