mirror of
https://github.com/antoinevignau/source.git
synced 2025-01-08 13:29:45 +00:00
1 line
16 KiB
C++
1 line
16 KiB
C++
|
/***********************************************************************\
Filename: phon.c
\***********************************************************************/
#include "environ.h"
#include "string.h"
/*#include "ctype.h"*/
/* This routine converts a string into the two-character representation of
the phonetically encoded string. The input string is matched against the
phonetic rules and the translated results are copied to the output
string. If the word is a special then the E_LRSEP (and a fill character)
and the second part of the flagged string are copied to the output
string. */
#define PH_FOLD 'X' /* conversion from character to index */
#define PH_START 'Y' /* start of word character */
#define PH_END 'Z' /* end of word character */
#define PH_NOFLAG 0x7F /* no rule end here */
VOID
phencode(instr, outptr0)
char *instr; /* The word to be encoded */
char *outptr0; /* The encoded string */
{
int node; /* a node index */
int cc; /* the current character */
char *substr; /* portion of the original input word */
/* also pointer into string array */
char *word; /* input word with start/stop delimiters */
char *outptr = outptr0;
int matched; /* number of characters matched */
int index; /* phonetic character or index into str tab */
char inbuf[LONGWORD + 2]; /* Buffer for copy of input word */
/* Add start of word (PH_START) and end of word (PH_END) indicators
to input word. If input characters are not valid for the
language, make them lower case and remove accent marks if
necessary. Node is used as a temp. */
word = inbuf;
*word++ = PH_START;
while (*instr && *instr != E_LRSEP)
{
cc = ctoi(*instr++);
if (!scvalid(cc))
{
node = tolower(cc);
cc = scvalid(node) ? node : cc;
}
*word++ = cc;
}
*word++ = PH_END;
*word = 0;
/* Begin search for each character of word */
word = inbuf;
while (*word && *word != PH_END)
{
matched = 0;
substr = word;
node = 0;
while (cc = ctoi(*substr++))
{
/* Have a character, so go to the node for the
character. The first line checks for a link to
the next character. The next prevents a
reference outside the node list. The third checks
that the node belongs to the state. If the
search is successful, record the number of
characters matched and the current node. */
cc = (cc - PH_FOLD) & 0xFF;
if ((node = Engnlink[node]) == 0
|| (node += cc) >= PH_SIZE
|| ctoi(Engnchar[node]) != cc)
break;
else if (Engnindex[node] != PH_NOFLAG)
{
matched = substr - word;
index = node;
}
}
/* If we are here because we have reached the end of the
string, record the number of characters matched and the
index. */
/* Convert the matched portion of the string:
If no character was matched, the default is character
into character E_FILL. If the character only marked the
start of the string (PH_START), ignore it. */
if (!matched)
{
if (*word != PH_START)
{
*outptr++ = *word;
*outptr++ = E_FILL;
}
++word;
continue;
}
/* Store actual index */
index = Engnindex[index];
/* If the index is less than 10, it represents a phonetic
character. Ignore the start and end characters. If only
one character was matched, converted string is input
character, phonetic character (converted index). If not,
two characters were matched (a repeat) which becomes
input character, phonetic character, E_REPEAT, E_FILL. */
|