2023-03-04 03:45:20 +01:00

1 line
6.9 KiB
C
Executable File

/***********************************************************************\
Filename: thesutil.c
\***********************************************************************/
extern pascal int Tlexcmp();
extern pascal void Tlexphon();
/*#include <ctype.h>*/
#include "proxio.h"
#include "tlex.h"
#include "spmemory.h"
#include "scdef.h"
#include "tenviron.h"
#include "thesmisc.h"
static remphon();
char *memread();
/* keep a global lexicon pointer for unftry */
static int Curbyte;
static int Bitsleft = 0;
static int Blknum = 0;
static char *Memblk;
static char *Bufend;
static char *Bufp;
/* return the thesaurus hash value of a word */
long theswtoh(word)
char *word;
{
char *lexword = Tscdecomp;
int wcount;
int blknum;
long hash;
char peword[MAXPHWORD];
char shit[5];
/* phonetically encode word */
if (phfull(word, peword, Tlexprod & SHORTLEX) == ERROR)
return (ERROR);
/* compute block to search */
Tlexblknum(peword);
blknum = Tsccurblk;
/* if seed word matches the query word, simply return the hash */
/* of the block */
hash = (long) blknum << WORDFLD;
if (Tlexcmp(peword, Tlexindextab[blknum]) == 0)
return (hash);
/* read block into memory */
if ((Tlexblk = memread(blknum + Tlexoffset, Tlexfile, MM_READ | MM_PR3)) ==
NULL)
return (ERROR);
/* prepare for sequential decompression of block */
wcount = 0;
Tlexblkptr = Tlexblk + BLOCKSEGS;
Tscendptr = strecpy(lexword, Tlexindextab[blknum]);
/* decompress until end of block or word found */
while (*Tlexblkptr)
{
Tlexbinnext();
++wcount;
if (!Tlexcmp(peword, Tscdecomp))
return ((long) wcount + hash);
}
return (ERROR);
}
/* given a hash value, return the corresponding word from the lexicon */
theshash(hash, phword)
long hash;
char *phword;
{
int blknum;
int nwords;
int nseg;
int i;
int c;
int offset;
char *blkptr;
*phword = '\0';
blknum = (int) (hash >> WORDFLD);
/* if the block number is greater than the number of */
/* blocks in the lexicon, the hash value is illegal */
if (blknum >= Tlexnindex)
return (ERROR);
Tsccurblk = blknum;
/* find out the number of words that need decompression */
nwords = (int) (hash & WORDBITS);
if (nwords == 0)
{
strecpy(phword, Tlexindextab[blknum]);
return (TRUE);
}
/* read the block indicated by offset */
if ((Tlexblk = memread(blknum + Tlexoffset, Tlexfile, MM_READ | MM_PR3)) ==
NULL)
return (ERROR);
/* find the segment where word is likely to be found */
nseg = nwords / Tlexsegwords;
if (nseg > BLOCKSEGS)
nseg = BLOCKSEGS;
blkptr = Tlexblk;
if (nseg != 0)
{
offset = 0;
for (i = 0; i < nseg && (c = ctoi(*blkptr++)) != 0; i++)
{
offset += c;
offset += Tlexsegwords;
nwords -= Tlexsegwords;
}
Tlexblkptr = Tlexblk + offset;
Tscendptr = Tscdecomp;
}
else
{
Tscendptr = strecpy(Tscdecomp,Tlexindextab[blknum]);
Tlexblkptr = Tlexblk + BLOCKSEGS;
}
Tlexphon();
/* use sequential lexget mode to get word after nwords */
while (--nwords >= 0)
if (!Tlexbinnext())
return (ERROR);
strecpy(phword, Tscdecomp);
return (TRUE);
}
/* convert a phonetically encoded word into an ascii word */
lextoasc(inword, outword, flags)
char *inword;
char *outword;
int flags;
{
char ascii[LONGWORD];
/* special check for null word, since skipping phonetic bin */
/* characters would automatically skip over an initial null */
if (*inword == '\0')
{
*outword = '\0';
return;
}
strecpy(ascii, inword);
remphon(ascii);
strecpy(outword, ascii);
undoflags(ascii, flags, outword);
}
/* get next nbits from the input stream */
#define NBITS 8
long
thesbit(ioptr, nbits)
HANDLE ioptr;
int nbits;
{
int scratch;
long val = 0L;
static char bits[] = { 0,0x01,0x03,0x07,0x0F,0x1F,0x3F,0x7F,0xFF };
while (nbits)
{
if (!Bitsleft)
{
if (Bufp > Bufend)
thesblk(ioptr,0L,0);
Curbyte = ctoi(*Bufp++);
Bitsleft = NBITS;
}
if (nbits <= Bitsleft)
{
scratch = Curbyte & bits[Bitsleft];
Bitsleft -= nbits;
return (val << nbits | (long) (scratch >> Bitsleft));
}
val = val << Bitsleft | (long) (Curbyte & bits[Bitsleft]);
nbits -= Bitsleft;
Bitsleft = 0;
}
return (0L);
}
/* skip the next "numbits" bits in the input stream. */
thesskip(fptr, numbits)
HANDLE fptr;
int numbits;
{
long skipbytes;
int bufleft;
if (numbits >= Bitsleft)
{
numbits -= Bitsleft;
Bitsleft = 0;
skipbytes = numbits >> 3;
numbits &= 0x07;
if ((bufleft = Bufend - Bufp + 1) > skipbytes)
Bufp += skipbytes;
else
{
skipbytes -= bufleft;
Blknum++;
while (skipbytes >= MM_BLKSIZE)
{
skipbytes -= MM_BLKSIZE;
Blknum++;
}
skipbytes += (long) Blknum << 10;
thesblk(fptr, skipbytes, 1);
}
Curbyte = ctoi(*Bufp++);
Bitsleft = NBITS - numbits;
}
else
Bitsleft -= numbits;
}
/* read a block into memory and initialize buffer pointers. */
thesblk(fptr, blkaddr, init)
HANDLE fptr;
long blkaddr;
int init;
{
int bufaddr;
if (init)
{
Bitsleft = 0;
Blknum = (int) (blkaddr >> 10);
bufaddr = blkaddr & 0x3FF;
}
else
{
Blknum++;
bufaddr = 0;
}
if ((Memblk = memread(Blknum, fptr, MM_READ | MM_PR3)) == NULL)
return (ERROR);
Bufp = Memblk + bufaddr;
Bufend = Memblk + MM_BLKSIZE - 1;
return (OKAY);
}
/* unflection word validation routine */
unftry(str)
char *str;
{
return(Tlexword(str));
}
/* Strip the phonetic bin characters, phonetic characters,
and padding from phptr. Overwrite phstr with the result. */
static remphon(phstr)
char *phstr;
{
char *clnptr;
char *phptr;
int phonchr;
int prevchr;
clnptr = phptr = phstr;
phptr += 2;
while (*phptr)
{
phonchr = Engphtab[*phptr++][0];
if (scvalid(phonchr))
{
*clnptr++ = phonchr;
if (isalpha(phonchr))
prevchr = phonchr;
}
else if (phonchr == E_REPEAT)
*clnptr++ = prevchr;
else if (phonchr == E_LRSEP)
{
strecpy(phstr, phptr);
return;
}
}
*clnptr = 0;
}
thintmv(source, dest, count)
char *source;
short *dest;
int count;
{
int c;
while (--count >= 0)
{
c = ctoi(*source++);
*dest++ = c + (ctoi(*source++) << 8);
}
}