antoine-source/appleworksgs/Spell/Src/PHON.C

1 line
16 KiB
C++
Raw Normal View History

2023-03-04 02:45:20 +00:00
/***********************************************************************\ Filename: phon.c \***********************************************************************/ #include "environ.h" #include "string.h" /*#include "ctype.h"*/ /* This routine converts a string into the two-character representation of the phonetically encoded string. The input string is matched against the phonetic rules and the translated results are copied to the output string. If the word is a special then the E_LRSEP (and a fill character) and the second part of the flagged string are copied to the output string. */ #define PH_FOLD 'X' /* conversion from character to index */ #define PH_START 'Y' /* start of word character */ #define PH_END 'Z' /* end of word character */ #define PH_NOFLAG 0x7F /* no rule end here */ VOID phencode(instr, outptr0) char *instr; /* The word to be encoded */ char *outptr0; /* The encoded string */ { int node; /* a node index */ int cc; /* the current character */ char *substr; /* portion of the original input word */ /* also pointer into string array */ char *word; /* input word with start/stop delimiters */ char *outptr = outptr0; int matched; /* number of characters matched */ int index; /* phonetic character or index into str tab */ char inbuf[LONGWORD + 2]; /* Buffer for copy of input word */ /* Add start of word (PH_START) and end of word (PH_END) indicators to input word. If input characters are not valid for the language, make them lower case and remove accent marks if necessary. Node is used as a temp. */ word = inbuf; *word++ = PH_START; while (*instr && *instr != E_LRSEP) { cc = ctoi(*instr++); if (!scvalid(cc)) { node = tolower(cc); cc = scvalid(node) ? node : cc; } *word++ = cc; } *word++ = PH_END; *word = 0; /* Begin search for each character of word */ word = inbuf; while (*word && *word != PH_END) { matched = 0; substr = word; node = 0; while (cc = ctoi(*substr++)) { /* Have a character, so go to the node for the character. The first line checks for a link to the next character. The next prevents a reference outside the node list. The third checks that the node belongs to the state. If the search is successful, record the number of characters matched and the current node. */ cc = (cc - PH_FOLD) & 0xFF; if ((node = Engnlink[node]) == 0 || (node += cc) >= PH_SIZE || ctoi(Engnchar[node]) != cc) break; else if (Engnindex[node] != PH_NOFLAG) { matched = substr - word; index = node; } } /* If we are here because we have reached the end of the string, record the number of characters matched and the index. */ /* Convert the matched portion of the string: If no character was matched, the default is character into character E_FILL. If the character only marked the start of the string (PH_START), ignore it. */ if (!matched) { if (*word != PH_START) { *outptr++ = *word; *outptr++ = E_FILL; } ++word; continue; } /* Store actual index */ index = Engnindex[index]; /* If the index is less than 10, it represents a phonetic character. Ignore the start and end characters. If only one character was matched, converted string is input character, phonetic character (converted index). If not, two characters were matched (a repeat) which becomes input character, phonetic character, E_REPEAT, E_FILL. */