mirror of
https://github.com/antoinevignau/source.git
synced 2025-02-08 19:30:33 +00:00
1 line
7.1 KiB
C
Executable File
1 line
7.1 KiB
C
Executable File
/***********************************************************************\
|
|
|
|
Filename: clxread.c
|
|
|
|
\***********************************************************************/
|
|
|
|
#include <memory.h>
|
|
#include <types.h>
|
|
#include <string.h>
|
|
#include "proxio.h"
|
|
#include "clxdef.h"
|
|
#include "clxtypo.h"
|
|
#include "sp.h"
|
|
#include "spdef.h"
|
|
|
|
int Clxflag; /* the search result flag */
|
|
char *Clxsword; /* the search word. */
|
|
|
|
/* the Corelex */
|
|
|
|
char Clxversion; /* the Corelex version code */
|
|
char Clxenv; /* the langauge code for the Corelex */
|
|
short Clxbwords; /* words per bin */
|
|
USHORT Clxnlenbin; /* number of cx_lenbin's */
|
|
USHORT Clxnflags; /* number of cx_flags's */
|
|
USHORT Clxnlettab; /* number of cx_lettab's */
|
|
USHORT Clxbins; /* number of cx_bintab's */
|
|
USHORT Clxndata; /* number of cx_data's */
|
|
|
|
USHORT *Clxlenbin; /* bin number of length partitions */
|
|
char *Clxflags; /* flags in the Corelex; up to 15 possible */
|
|
char *Clxlettab; /* freq sorted, letters used in the list */
|
|
USHORT *Clxbintab; /* table of byte offsets to Corelex bins */
|
|
char **Clxdata; /* Handle to compressed wordlist (4-bit "nibbles") */
|
|
char theclxword[MAXCORWORD+1];
|
|
|
|
extern pascal int clxgetbin();
|
|
|
|
/* Load the largest corelex available */
|
|
|
|
static char *sizes[] = { "32", "6", "4", "2" };
|
|
|
|
getacorelex()
|
|
{
|
|
unsigned x;
|
|
char *dend;
|
|
|
|
dend = strchr(_SPdir, 0);
|
|
for (x = 0; x < 4; ++x)
|
|
{
|
|
strcpy(dend, "eng");
|
|
strcat(dend, sizes[x]);
|
|
strcat(dend, ".clx");
|
|
if (clxalloc(_SPdir))
|
|
{
|
|
*dend = 0;
|
|
return(1);
|
|
}
|
|
}
|
|
*dend = 0;
|
|
return(0);
|
|
}
|
|
|
|
|
|
/* Read a Corelex into memory. */
|
|
|
|
clxalloc(name)
|
|
char *name; /* the file to open */
|
|
{
|
|
extern VOID clxfree();
|
|
HANDLE file; /* the file to read from */
|
|
|
|
if ((file = stdopen(name, H_RDONLY)) == H_ERROR)
|
|
return (FALSE);
|
|
|
|
/* read in the Corelex header info */
|
|
|
|
if (bytread((char *) &Clxversion, CLXCHARS, file) == ERROR
|
|
|| bytread((char *) &Clxbwords, 2 * CLXSHORTS, file) == ERROR)
|
|
{
|
|
stdclose(file);
|
|
return (FALSE);
|
|
}
|
|
|
|
/* read the various arrays */
|
|
|
|
Clxdata = NewHandle((long) Clxndata, _SPID, 0x0110, 0L);
|
|
if ((!Clxdata) ||
|
|
abytread(&Clxlettab, Clxnlettab, file) == ERROR ||
|
|
bytread(*Clxdata, Clxndata, file) == ERROR ||
|
|
abytread(&Clxbintab, 2 * (Clxbins + 1), file) == ERROR ||
|
|
abytread(&Clxlenbin, 2 * Clxnlenbin, file) == ERROR ||
|
|
abytread(&Clxflags, Clxnflags, file) == ERROR)
|
|
{
|
|
stdclose(file);
|
|
return (FALSE);
|
|
}
|
|
stdclose(file);
|
|
|
|
return (TRUE);
|
|
}
|
|
|
|
/* Free the Corelex. */
|
|
|
|
VOID
|
|
clxfree()
|
|
{
|
|
nzfree(Clxlenbin);
|
|
nzfree(Clxflags);
|
|
nzfree(Clxlettab);
|
|
nzfree(Clxbintab);
|
|
if (*Clxdata)
|
|
DisposeHandle(Clxdata);
|
|
}
|
|
|
|
/* Look for word in a Corelex. If the word is found, return its flag byte,
|
|
otherwise return 0. */
|
|
|
|
extern pascal int clxffunc();
|
|
|
|
clxword(word)
|
|
char *word; /* the word to search for */
|
|
{
|
|
|
|
int qlen; /* length of query word */
|
|
int sbin; /* starting bin */
|
|
int bin; /* actual bin to search */
|
|
int ebin; /* ending bin */
|
|
|
|
/* check length of query */
|
|
|
|
if ((qlen = strlen((char *) word)) >= MAXCORWORD)
|
|
return (CLX_NOTFOUND);
|
|
|
|
/* look for the bin that the word is in. */
|
|
|
|
Clxsword = word;
|
|
Clxflag = 0;
|
|
sbin = Clxlenbin[qlen];
|
|
ebin = Clxlenbin[qlen + 1] - 1;
|
|
if ((bin = clxbinfind(word, sbin, ebin, qlen)) < sbin)
|
|
return (CLX_NOTFOUND);
|
|
|
|
/* decompress the bin and find the word, if there */
|
|
|
|
clxgetbin(clxffunc, bin, qlen);
|
|
return (Clxflag);
|
|
}
|
|
|
|
/* GETNIB gets a byte from the Corelex at cdptr and returns the high or low
|
|
nibble depending on the gn flag; this could be done with a function, but
|
|
is done as a macro for efficiency. It depends on specific variable names
|
|
in the functions, so these should be changed with care. */
|
|
|
|
#define GETNIB() ((gn = ~gn) ? (bt = ctoi(*cdptr++)) >> 4 : bt & 0x0F)
|
|
|
|
/* Determine word's proper bin in the Corelex by binary-searching the first
|
|
word of the bins. Return the actual bin number. */
|
|
|
|
clxbinfind(word0, minval, maxval, len)
|
|
char *word0; /* placeholder at beginning of word */
|
|
int minval; /* lower limit on bin number */
|
|
int maxval; /* upper limit on bin number */
|
|
int len; /* length of words in the bin */
|
|
{
|
|
char *word; /* actual word scanner */
|
|
int frindex; /* index into letter table */
|
|
char *cdptr; /* pointer into the bin */
|
|
int gn; /* nibble-getter flag */
|
|
int bt; /* holds a byte read from cx_data */
|
|
int bin; /* bin number to check */
|
|
int cnt; /* length of a delta */
|
|
int tmp;
|
|
|
|
while (minval < maxval)
|
|
{
|
|
bin = (minval + maxval + 1) >> 1;
|
|
|
|
/* cx_bintab[bin] is byte offset into Corelex array
|
|
(cx_data[]) of start of next bin to be checked */
|
|
|
|
cdptr = *Clxdata + Clxbintab[bin];
|
|
|
|
/* decompress the first word in the bin, comparing it with
|
|
the input word to see if the input word is in the bin */
|
|
|
|
gn = FALSE;
|
|
word = word0;
|
|
cnt = len;
|
|
while (--cnt >= 0)
|
|
{
|
|
/* read nibbles to form the character index; as in coreword(),
|
|
sequence of hex-F's can be arbitrarily long */
|
|
|
|
frindex = 0;
|
|
do
|
|
frindex += (tmp = GETNIB());
|
|
while (tmp == 0x0F);
|
|
|
|
/* Compare the word and a letter from the Corelex */
|
|
|
|
tmp = ctoi(*word++) - ctoi(Clxlettab[frindex]);
|
|
if (tmp)
|
|
break;
|
|
}
|
|
if (tmp >= 0)
|
|
minval = bin;
|
|
else
|
|
maxval = bin - 1;
|
|
}
|
|
return (maxval);
|
|
}
|
|
|
|
/* decompress a bin and call a function for each word in the bin */
|
|
|
|
/* Coded in assembly (9/23/87 RAH) */
|
|
|
|
/* Perform typo correct on a word. Check the Corelex which is one deletion,
|
|
insertion, substitution or transposition away from 'word'. Call 'func'
|
|
for each word found. */
|
|
|
|
unsigned doingins; /* flag to indicate insert or omit */
|
|
|
|
extern int typosub();
|
|
extern int typoins();
|
|
|
|
VOID
|
|
clxtypo(word)
|
|
char *word; /* the word to check against the Corelex */
|
|
{
|
|
unsigned len; /* length of bins being checked */
|
|
unsigned bin; /* bin number being checked */
|
|
unsigned cnt; /* counter for length bins */
|
|
unsigned ebin; /* end of the current bin */
|
|
|
|
int (*(functbl[3]))();
|
|
|
|
functbl[0] = functbl[2] = typoins;
|
|
functbl[1] = typosub;
|
|
|
|
/* initialize static storage for the called routines */
|
|
|
|
Clxsword = word;
|
|
|
|
/* check for each type of typo error */
|
|
|
|
len = strlen(word) + 2;
|
|
cnt = 3;
|
|
doingins = 0;
|
|
while (cnt--)
|
|
{
|
|
/* Check for a valid length */
|
|
|
|
if (!--len)
|
|
continue;
|
|
if (len >= MAXCORWORD)
|
|
continue;
|
|
|
|
/* scan each bin in the length partition and check each
|
|
word. */
|
|
|
|
bin = Clxlenbin[len];
|
|
ebin = Clxlenbin[len + 1];
|
|
while (bin < ebin)
|
|
clxgetbin(functbl[cnt], bin++, len);
|
|
|
|
++doingins;
|
|
}
|
|
}
|