mirror of
https://github.com/antoinevignau/source.git
synced 2025-01-04 04:31:04 +00:00
1 line
6.9 KiB
C
Executable File
1 line
6.9 KiB
C
Executable File
/***********************************************************************\
|
|
|
|
Filename: thesutil.c
|
|
|
|
\***********************************************************************/
|
|
|
|
extern pascal int Tlexcmp();
|
|
extern pascal void Tlexphon();
|
|
|
|
/*#include <ctype.h>*/
|
|
#include "proxio.h"
|
|
#include "tlex.h"
|
|
#include "spmemory.h"
|
|
#include "scdef.h"
|
|
#include "tenviron.h"
|
|
#include "thesmisc.h"
|
|
|
|
static remphon();
|
|
|
|
char *memread();
|
|
|
|
/* keep a global lexicon pointer for unftry */
|
|
|
|
static int Curbyte;
|
|
static int Bitsleft = 0;
|
|
static int Blknum = 0;
|
|
static char *Memblk;
|
|
static char *Bufend;
|
|
static char *Bufp;
|
|
|
|
/* return the thesaurus hash value of a word */
|
|
|
|
long theswtoh(word)
|
|
char *word;
|
|
{
|
|
char *lexword = Tscdecomp;
|
|
int wcount;
|
|
int blknum;
|
|
long hash;
|
|
char peword[MAXPHWORD];
|
|
|
|
char shit[5];
|
|
|
|
/* phonetically encode word */
|
|
|
|
if (phfull(word, peword, Tlexprod & SHORTLEX) == ERROR)
|
|
return (ERROR);
|
|
|
|
/* compute block to search */
|
|
|
|
Tlexblknum(peword);
|
|
blknum = Tsccurblk;
|
|
|
|
/* if seed word matches the query word, simply return the hash */
|
|
/* of the block */
|
|
|
|
hash = (long) blknum << WORDFLD;
|
|
if (Tlexcmp(peword, Tlexindextab[blknum]) == 0)
|
|
return (hash);
|
|
|
|
/* read block into memory */
|
|
|
|
if ((Tlexblk = memread(blknum + Tlexoffset, Tlexfile, MM_READ | MM_PR3)) ==
|
|
NULL)
|
|
return (ERROR);
|
|
|
|
/* prepare for sequential decompression of block */
|
|
|
|
wcount = 0;
|
|
Tlexblkptr = Tlexblk + BLOCKSEGS;
|
|
Tscendptr = strecpy(lexword, Tlexindextab[blknum]);
|
|
|
|
/* decompress until end of block or word found */
|
|
|
|
while (*Tlexblkptr)
|
|
{
|
|
Tlexbinnext();
|
|
++wcount;
|
|
if (!Tlexcmp(peword, Tscdecomp))
|
|
return ((long) wcount + hash);
|
|
}
|
|
return (ERROR);
|
|
}
|
|
|
|
/* given a hash value, return the corresponding word from the lexicon */
|
|
|
|
theshash(hash, phword)
|
|
long hash;
|
|
char *phword;
|
|
{
|
|
int blknum;
|
|
int nwords;
|
|
int nseg;
|
|
int i;
|
|
int c;
|
|
int offset;
|
|
char *blkptr;
|
|
|
|
*phword = '\0';
|
|
blknum = (int) (hash >> WORDFLD);
|
|
|
|
/* if the block number is greater than the number of */
|
|
/* blocks in the lexicon, the hash value is illegal */
|
|
|
|
if (blknum >= Tlexnindex)
|
|
return (ERROR);
|
|
Tsccurblk = blknum;
|
|
|
|
/* find out the number of words that need decompression */
|
|
|
|
nwords = (int) (hash & WORDBITS);
|
|
|
|
if (nwords == 0)
|
|
{
|
|
strecpy(phword, Tlexindextab[blknum]);
|
|
return (TRUE);
|
|
}
|
|
|
|
/* read the block indicated by offset */
|
|
|
|
if ((Tlexblk = memread(blknum + Tlexoffset, Tlexfile, MM_READ | MM_PR3)) ==
|
|
NULL)
|
|
return (ERROR);
|
|
|
|
/* find the segment where word is likely to be found */
|
|
|
|
nseg = nwords / Tlexsegwords;
|
|
if (nseg > BLOCKSEGS)
|
|
nseg = BLOCKSEGS;
|
|
blkptr = Tlexblk;
|
|
|
|
if (nseg != 0)
|
|
{
|
|
offset = 0;
|
|
for (i = 0; i < nseg && (c = ctoi(*blkptr++)) != 0; i++)
|
|
{
|
|
offset += c;
|
|
offset += Tlexsegwords;
|
|
nwords -= Tlexsegwords;
|
|
}
|
|
Tlexblkptr = Tlexblk + offset;
|
|
Tscendptr = Tscdecomp;
|
|
}
|
|
else
|
|
{
|
|
Tscendptr = strecpy(Tscdecomp,Tlexindextab[blknum]);
|
|
Tlexblkptr = Tlexblk + BLOCKSEGS;
|
|
}
|
|
Tlexphon();
|
|
|
|
/* use sequential lexget mode to get word after nwords */
|
|
|
|
while (--nwords >= 0)
|
|
if (!Tlexbinnext())
|
|
return (ERROR);
|
|
strecpy(phword, Tscdecomp);
|
|
return (TRUE);
|
|
}
|
|
|
|
/* convert a phonetically encoded word into an ascii word */
|
|
|
|
lextoasc(inword, outword, flags)
|
|
char *inword;
|
|
char *outword;
|
|
int flags;
|
|
{
|
|
char ascii[LONGWORD];
|
|
|
|
/* special check for null word, since skipping phonetic bin */
|
|
/* characters would automatically skip over an initial null */
|
|
|
|
if (*inword == '\0')
|
|
{
|
|
*outword = '\0';
|
|
return;
|
|
}
|
|
|
|
strecpy(ascii, inword);
|
|
remphon(ascii);
|
|
strecpy(outword, ascii);
|
|
undoflags(ascii, flags, outword);
|
|
}
|
|
|
|
/* get next nbits from the input stream */
|
|
|
|
#define NBITS 8
|
|
|
|
long
|
|
thesbit(ioptr, nbits)
|
|
HANDLE ioptr;
|
|
int nbits;
|
|
{
|
|
int scratch;
|
|
long val = 0L;
|
|
static char bits[] = { 0,0x01,0x03,0x07,0x0F,0x1F,0x3F,0x7F,0xFF };
|
|
|
|
while (nbits)
|
|
{
|
|
if (!Bitsleft)
|
|
{
|
|
if (Bufp > Bufend)
|
|
thesblk(ioptr,0L,0);
|
|
Curbyte = ctoi(*Bufp++);
|
|
Bitsleft = NBITS;
|
|
}
|
|
if (nbits <= Bitsleft)
|
|
{
|
|
scratch = Curbyte & bits[Bitsleft];
|
|
Bitsleft -= nbits;
|
|
return (val << nbits | (long) (scratch >> Bitsleft));
|
|
}
|
|
val = val << Bitsleft | (long) (Curbyte & bits[Bitsleft]);
|
|
nbits -= Bitsleft;
|
|
Bitsleft = 0;
|
|
}
|
|
|
|
return (0L);
|
|
}
|
|
|
|
/* skip the next "numbits" bits in the input stream. */
|
|
|
|
thesskip(fptr, numbits)
|
|
HANDLE fptr;
|
|
int numbits;
|
|
{
|
|
long skipbytes;
|
|
int bufleft;
|
|
|
|
if (numbits >= Bitsleft)
|
|
{
|
|
numbits -= Bitsleft;
|
|
Bitsleft = 0;
|
|
skipbytes = numbits >> 3;
|
|
numbits &= 0x07;
|
|
if ((bufleft = Bufend - Bufp + 1) > skipbytes)
|
|
Bufp += skipbytes;
|
|
else
|
|
{
|
|
skipbytes -= bufleft;
|
|
Blknum++;
|
|
while (skipbytes >= MM_BLKSIZE)
|
|
{
|
|
skipbytes -= MM_BLKSIZE;
|
|
Blknum++;
|
|
}
|
|
skipbytes += (long) Blknum << 10;
|
|
thesblk(fptr, skipbytes, 1);
|
|
}
|
|
Curbyte = ctoi(*Bufp++);
|
|
Bitsleft = NBITS - numbits;
|
|
}
|
|
else
|
|
Bitsleft -= numbits;
|
|
}
|
|
|
|
/* read a block into memory and initialize buffer pointers. */
|
|
|
|
thesblk(fptr, blkaddr, init)
|
|
HANDLE fptr;
|
|
long blkaddr;
|
|
int init;
|
|
{
|
|
int bufaddr;
|
|
|
|
if (init)
|
|
{
|
|
Bitsleft = 0;
|
|
Blknum = (int) (blkaddr >> 10);
|
|
bufaddr = blkaddr & 0x3FF;
|
|
}
|
|
else
|
|
{
|
|
Blknum++;
|
|
bufaddr = 0;
|
|
}
|
|
|
|
if ((Memblk = memread(Blknum, fptr, MM_READ | MM_PR3)) == NULL)
|
|
return (ERROR);
|
|
Bufp = Memblk + bufaddr;
|
|
Bufend = Memblk + MM_BLKSIZE - 1;
|
|
return (OKAY);
|
|
}
|
|
|
|
/* unflection word validation routine */
|
|
|
|
unftry(str)
|
|
char *str;
|
|
{
|
|
return(Tlexword(str));
|
|
}
|
|
|
|
/* Strip the phonetic bin characters, phonetic characters,
|
|
and padding from phptr. Overwrite phstr with the result. */
|
|
|
|
static remphon(phstr)
|
|
char *phstr;
|
|
{
|
|
char *clnptr;
|
|
char *phptr;
|
|
int phonchr;
|
|
int prevchr;
|
|
|
|
clnptr = phptr = phstr;
|
|
phptr += 2;
|
|
while (*phptr)
|
|
{
|
|
phonchr = Engphtab[*phptr++][0];
|
|
if (scvalid(phonchr))
|
|
{
|
|
*clnptr++ = phonchr;
|
|
if (isalpha(phonchr))
|
|
prevchr = phonchr;
|
|
}
|
|
else if (phonchr == E_REPEAT)
|
|
*clnptr++ = prevchr;
|
|
else if (phonchr == E_LRSEP)
|
|
{
|
|
strecpy(phstr, phptr);
|
|
return;
|
|
}
|
|
}
|
|
*clnptr = 0;
|
|
}
|
|
|
|
thintmv(source, dest, count)
|
|
char *source;
|
|
short *dest;
|
|
int count;
|
|
{
|
|
int c;
|
|
|
|
while (--count >= 0)
|
|
{
|
|
c = ctoi(*source++);
|
|
*dest++ = c + (ctoi(*source++) << 8);
|
|
}
|
|
}
|