mirror of
https://github.com/antoinevignau/source.git
synced 2025-01-04 04:31:04 +00:00
1 line
5.6 KiB
C
Executable File
1 line
5.6 KiB
C
Executable File
/***********************************************************************\
|
|
|
|
Filename: environ.h
|
|
|
|
\***********************************************************************/
|
|
|
|
#ifndef SC_ENVIRON
|
|
#define SC_ENVIRON
|
|
|
|
#ifndef _PROXLIB
|
|
#include <proxlib.h>
|
|
#endif
|
|
|
|
#ifndef _SSEARCH
|
|
#include <ssearch.h>
|
|
#endif
|
|
|
|
#ifndef SC_SCDEF
|
|
#include "scdef.h"
|
|
#endif
|
|
|
|
/* These define the tags associated with lexicon words. */
|
|
|
|
#define IW_TAG 0x80 /* Bit that indicates a tag. */
|
|
#define IW_TAGMASK 0x7F /* The tag information mask */
|
|
#define IW_TAGACC 0x70 /* Mask for the accent number field */
|
|
#define IW_TAGINDX 0x0F /* Mask for the suffix number field */
|
|
|
|
/* These are the characters used in unencoded words and in the two character
|
|
phonetic words. */
|
|
|
|
#define E_HYTEMP 0x07 /* hyphen substitution character */
|
|
#define E_LRSEP 0x08 /* mispelling-pair separator */
|
|
#define E_REPEAT 0x09 /* "repeat last character" symbol */
|
|
#define E_FILL 0x0B /* pair-padding "nothing" phonetic value */
|
|
#define E_LOPH 0x10 /* lowest phonetic character */
|
|
#define E_LOGPH 0x11 /* lowest PF474 Group symbol */
|
|
#define E_HIGPH 0x18 /* highest PF474 Group symbol */
|
|
#define E_HIPH 0x19 /* highest (ascii value) phonetic character */
|
|
#define E_VOWEL 0x18 /* phonetic value of vowels */
|
|
#define E_TCBASE 0xC0 /* base of 64-char transition symbol set */
|
|
|
|
/* These are special codes in the one character phonetic strings. */
|
|
|
|
#define E_LRIND 0x04 /* index of E_LRSEP in phonetic tables */
|
|
#define E_RPTIND 0x05 /* index of E_REPEAT,E_FILL in phon tables */
|
|
|
|
/* PHON defines the phonetic structure of each language.
|
|
|
|
ph_ctype contains bits that describe how the language uses each
|
|
character. The characteristics include (among other things): the
|
|
character is valid, the character is punctuation, and the character is a
|
|
vowel.
|
|
|
|
The fields ph_size, ph_sep, ph_nlink, ph_nchar, ph_nindex, and ph_str
|
|
define the rules for phonetic encoding. To see how they do so, see the
|
|
phencode routine.
|
|
|
|
The ph_bintab element contains a sorted list of phonetic essences. Words
|
|
in CLAM files and compressed lexicons are "binned" with this essence
|
|
table, allowing rapid access to words through their phonetically encoded
|
|
forms. ph_nbin is the size of the table.
|
|
|
|
ph_phontab is the translation table for phonetic characters. The table
|
|
is indexed by a phonetic character and translates it into a phonetic
|
|
pair. This size of this table is ph_nphon. The fields ph_minprep,
|
|
ph_nprep, and ph_efill record special information about the phonetic
|
|
table.
|
|
|
|
The fields ph_phpar and ph_phspar contain the theta parameters for the
|
|
language. The first of these is for ordinary length words, the second,
|
|
for short words.
|
|
*/
|
|
|
|
/* What used to be the PHON structure */
|
|
|
|
#define NBIN 170
|
|
#define NPHON 58
|
|
#define MINALPH 12
|
|
#define NUMALPH 46
|
|
#define EFILLIND 7
|
|
extern UCHAR *Engphbintab[]; /* location of constant bin table */
|
|
extern UCHAR Engphtab[][2]; /* table of phon pair-to-byte mappings */
|
|
extern UCHAR Engctype[]; /* Character types information */
|
|
extern UCHAR Engphpar[]; /* Proximity parameters */
|
|
extern UCHAR Engphspar[]; /* Proximity parameters - short words */
|
|
#define PH_SIZE 0x9f
|
|
#define PH_SEP 0x23
|
|
extern UCHAR Engnlink[]; /* link to next node in tree */
|
|
extern UCHAR Engnchar[]; /* character for this node */
|
|
extern UCHAR Engnindex[]; /* index into string table */
|
|
extern UCHAR Engstr[]; /* the string table */
|
|
|
|
/* Environment structure. Those things which can change from language to
|
|
language are (partly) centralized here. The first item in this table is
|
|
the environment code for the language.
|
|
|
|
The next section of the structure contains pointers to other data used by
|
|
the Spelling Components for this language.
|
|
|
|
The last section contains pointers to special functions for the
|
|
language. */
|
|
|
|
/* Environment globals */
|
|
|
|
extern int Iswacctab[];
|
|
|
|
/* SC globals */
|
|
|
|
extern int Sciswctl; /* The word lists to be used by isword() */
|
|
extern int Scflags; /* The flags being checked */
|
|
extern int Sclen; /* The length being checked */
|
|
extern int Scindex; /* The position being checked */
|
|
extern int Scaccmiss; /* The query is missing an accent */
|
|
extern UCHAR Sclookup[LONGWORD+2]; /* The lookup form being checked */
|
|
extern char *Sccurlist; /* The current word list */
|
|
extern int Sccurblk; /* The current block number */
|
|
extern UCHAR Scdecomp[3*MAXWORD]; /* The decompression buffer */
|
|
extern UCHAR Scfdecomp[MAXFLAGS]; /* The flag decompression buffer */
|
|
extern int Sctag; /* The tag for the current word */
|
|
extern UCHAR *Scendptr; /* End of the decompressed data */
|
|
|
|
/* character typing bits for use with the ph_ctype tables. */
|
|
|
|
#define C_VALID 0x01 /* valid character in a word */
|
|
#define C_DET_T 0x08 /* Detect: character is a token by itself */
|
|
#define C_DET_V 0x20 /* Detect: character is vowel */
|
|
#define C_ACMATCH 0x40 /* unaccented char matches this char */
|
|
|
|
#define scvalid(c) (Engctype[ctoi(c)] & C_VALID)
|
|
/* Character is encodable */
|
|
#define scvowel(c) (Engctype[ctoi(c)] & C_DET_V)
|
|
/* Character is a vowel */
|
|
#define isaccent(c) ((c) & 0x80)
|
|
/* Character has diacritic mark */
|
|
|
|
/* Miscellaneous definitions. */
|
|
|
|
#define U_S(s) ((UCHAR *)(s)) /* Cast to unsigned char */
|
|
#define scmove(d,s,c) (move((char *)(d), (char *)(s), (c))) /* Move arb data*/
|
|
|
|
#endif /* _ENVIRON */
|