antoine-source/appleworksgs/Spell/Src/CLX.C
2023-03-04 03:45:20 +01:00

1 line
7.1 KiB
C
Executable File

/***********************************************************************\
Filename: clxread.c
\***********************************************************************/
#include <memory.h>
#include <types.h>
#include <string.h>
#include "proxio.h"
#include "clxdef.h"
#include "clxtypo.h"
#include "sp.h"
#include "spdef.h"
int Clxflag; /* the search result flag */
char *Clxsword; /* the search word. */
/* the Corelex */
char Clxversion; /* the Corelex version code */
char Clxenv; /* the langauge code for the Corelex */
short Clxbwords; /* words per bin */
USHORT Clxnlenbin; /* number of cx_lenbin's */
USHORT Clxnflags; /* number of cx_flags's */
USHORT Clxnlettab; /* number of cx_lettab's */
USHORT Clxbins; /* number of cx_bintab's */
USHORT Clxndata; /* number of cx_data's */
USHORT *Clxlenbin; /* bin number of length partitions */
char *Clxflags; /* flags in the Corelex; up to 15 possible */
char *Clxlettab; /* freq sorted, letters used in the list */
USHORT *Clxbintab; /* table of byte offsets to Corelex bins */
char **Clxdata; /* Handle to compressed wordlist (4-bit "nibbles") */
char theclxword[MAXCORWORD+1];
extern pascal int clxgetbin();
/* Load the largest corelex available */
static char *sizes[] = { "32", "6", "4", "2" };
getacorelex()
{
unsigned x;
char *dend;
dend = strchr(_SPdir, 0);
for (x = 0; x < 4; ++x)
{
strcpy(dend, "eng");
strcat(dend, sizes[x]);
strcat(dend, ".clx");
if (clxalloc(_SPdir))
{
*dend = 0;
return(1);
}
}
*dend = 0;
return(0);
}
/* Read a Corelex into memory. */
clxalloc(name)
char *name; /* the file to open */
{
extern VOID clxfree();
HANDLE file; /* the file to read from */
if ((file = stdopen(name, H_RDONLY)) == H_ERROR)
return (FALSE);
/* read in the Corelex header info */
if (bytread((char *) &Clxversion, CLXCHARS, file) == ERROR
|| bytread((char *) &Clxbwords, 2 * CLXSHORTS, file) == ERROR)
{
stdclose(file);
return (FALSE);
}
/* read the various arrays */
Clxdata = NewHandle((long) Clxndata, _SPID, 0x0110, 0L);
if ((!Clxdata) ||
abytread(&Clxlettab, Clxnlettab, file) == ERROR ||
bytread(*Clxdata, Clxndata, file) == ERROR ||
abytread(&Clxbintab, 2 * (Clxbins + 1), file) == ERROR ||
abytread(&Clxlenbin, 2 * Clxnlenbin, file) == ERROR ||
abytread(&Clxflags, Clxnflags, file) == ERROR)
{
stdclose(file);
return (FALSE);
}
stdclose(file);
return (TRUE);
}
/* Free the Corelex. */
VOID
clxfree()
{
nzfree(Clxlenbin);
nzfree(Clxflags);
nzfree(Clxlettab);
nzfree(Clxbintab);
if (*Clxdata)
DisposeHandle(Clxdata);
}
/* Look for word in a Corelex. If the word is found, return its flag byte,
otherwise return 0. */
extern pascal int clxffunc();
clxword(word)
char *word; /* the word to search for */
{
int qlen; /* length of query word */
int sbin; /* starting bin */
int bin; /* actual bin to search */
int ebin; /* ending bin */
/* check length of query */
if ((qlen = strlen((char *) word)) >= MAXCORWORD)
return (CLX_NOTFOUND);
/* look for the bin that the word is in. */
Clxsword = word;
Clxflag = 0;
sbin = Clxlenbin[qlen];
ebin = Clxlenbin[qlen + 1] - 1;
if ((bin = clxbinfind(word, sbin, ebin, qlen)) < sbin)
return (CLX_NOTFOUND);
/* decompress the bin and find the word, if there */
clxgetbin(clxffunc, bin, qlen);
return (Clxflag);
}
/* GETNIB gets a byte from the Corelex at cdptr and returns the high or low
nibble depending on the gn flag; this could be done with a function, but
is done as a macro for efficiency. It depends on specific variable names
in the functions, so these should be changed with care. */
#define GETNIB() ((gn = ~gn) ? (bt = ctoi(*cdptr++)) >> 4 : bt & 0x0F)
/* Determine word's proper bin in the Corelex by binary-searching the first
word of the bins. Return the actual bin number. */
clxbinfind(word0, minval, maxval, len)
char *word0; /* placeholder at beginning of word */
int minval; /* lower limit on bin number */
int maxval; /* upper limit on bin number */
int len; /* length of words in the bin */
{
char *word; /* actual word scanner */
int frindex; /* index into letter table */
char *cdptr; /* pointer into the bin */
int gn; /* nibble-getter flag */
int bt; /* holds a byte read from cx_data */
int bin; /* bin number to check */
int cnt; /* length of a delta */
int tmp;
while (minval < maxval)
{
bin = (minval + maxval + 1) >> 1;
/* cx_bintab[bin] is byte offset into Corelex array
(cx_data[]) of start of next bin to be checked */
cdptr = *Clxdata + Clxbintab[bin];
/* decompress the first word in the bin, comparing it with
the input word to see if the input word is in the bin */
gn = FALSE;
word = word0;
cnt = len;
while (--cnt >= 0)
{
/* read nibbles to form the character index; as in coreword(),
sequence of hex-F's can be arbitrarily long */
frindex = 0;
do
frindex += (tmp = GETNIB());
while (tmp == 0x0F);
/* Compare the word and a letter from the Corelex */
tmp = ctoi(*word++) - ctoi(Clxlettab[frindex]);
if (tmp)
break;
}
if (tmp >= 0)
minval = bin;
else
maxval = bin - 1;
}
return (maxval);
}
/* decompress a bin and call a function for each word in the bin */
/* Coded in assembly (9/23/87 RAH) */
/* Perform typo correct on a word. Check the Corelex which is one deletion,
insertion, substitution or transposition away from 'word'. Call 'func'
for each word found. */
unsigned doingins; /* flag to indicate insert or omit */
extern int typosub();
extern int typoins();
VOID
clxtypo(word)
char *word; /* the word to check against the Corelex */
{
unsigned len; /* length of bins being checked */
unsigned bin; /* bin number being checked */
unsigned cnt; /* counter for length bins */
unsigned ebin; /* end of the current bin */
int (*(functbl[3]))();
functbl[0] = functbl[2] = typoins;
functbl[1] = typosub;
/* initialize static storage for the called routines */
Clxsword = word;
/* check for each type of typo error */
len = strlen(word) + 2;
cnt = 3;
doingins = 0;
while (cnt--)
{
/* Check for a valid length */
if (!--len)
continue;
if (len >= MAXCORWORD)
continue;
/* scan each bin in the length partition and check each
word. */
bin = Clxlenbin[len];
ebin = Clxlenbin[len + 1];
while (bin < ebin)
clxgetbin(functbl[cnt], bin++, len);
++doingins;
}
}