antoine-source/appleworksgs/Spell/Src/PHON.C

/***********************************************************************\

   Filename: phon.c

\***********************************************************************/

#include "environ.h"
#include "string.h"
/*#include "ctype.h"*/

/* This routine converts a string into the two-character representation of 
   the phonetically encoded string.  The input string is matched against the 
   phonetic rules and the translated results are copied to the output 
   string.  If the word is a special then the E_LRSEP (and a fill character) 
   and the second part of the flagged string are copied to the output 
   string.  */ 

#define PH_FOLD   'X'           /* conversion from character to index */
#define PH_START  'Y'           /* start of word character */
#define PH_END    'Z'           /* end of word character */
#define PH_NOFLAG 0x7F          /* no rule end here */

VOID
phencode(instr, outptr0)
char   *instr;                 /* The word to be encoded */
char   *outptr0;               /* The encoded string */
{
 int node;      /* a node index */
 int cc;        /* the current character */
 char *substr; /* portion of the original input word */
                        /* also pointer into string array */
 char *word;   /* input word with start/stop delimiters */
 char *outptr = outptr0;
int     matched;        /* number of characters matched */
int     index;          /* phonetic character or index into str tab */
char   inbuf[LONGWORD + 2];    /* Buffer for copy of input word */

   /* Add start of word (PH_START) and end of word (PH_END) indicators 
      to input word.  If input characters are not valid for the 
      language, make them lower case and remove accent marks if 
      necessary.  Node is used as a temp.  */ 

   word = inbuf;
   *word++ = PH_START;
   while (*instr && *instr != E_LRSEP)
   {
       cc = ctoi(*instr++);
       if (!scvalid(cc))
       {
           node = tolower(cc);
           cc = scvalid(node) ? node : cc;
       }
       *word++ = cc;
   }
   *word++ = PH_END;
   *word = 0;

   /* Begin search for each character of word */

   word = inbuf;
   while (*word && *word != PH_END)
   {
       matched = 0;
       substr = word;
       node = 0;
       while (cc = ctoi(*substr++))
       {
           /* Have a character, so go to the node for the 
              character.  The first line checks for a link to 
              the next character.  The next prevents a 
              reference outside the node list.  The third checks
              that the node belongs to the state.  If the
              search is successful, record the number of
              characters matched and the current node. */

           cc = (cc - PH_FOLD) & 0xFF;
           if ((node = Engnlink[node]) == 0
             || (node += cc) >= PH_SIZE
             || ctoi(Engnchar[node]) != cc)
               break;
           else if (Engnindex[node] != PH_NOFLAG)
           {
               matched = substr - word;
               index = node;
           }
       }

       /* If we are here because we have reached the end of the 
          string, record the number of characters matched and the 
          index.  */ 

       /* Convert the matched portion of the string:

          If no character was matched, the default is character 
          into character E_FILL.  If the character only marked the 
          start of the string (PH_START), ignore it.  */

       if (!matched)
       {
           if (*word != PH_START)
           {
               *outptr++ = *word;
               *outptr++ = E_FILL;
           }
           ++word;
           continue;
       }

       /* Store actual index */

       index = Engnindex[index];

       /* If the index is less than 10, it represents a phonetic 
          character.  Ignore the start and end characters.  If only 
          one character was matched, converted string is input 
          character, phonetic character (converted index).  If not, 
          two characters were matched (a repeat) which becomes 
          input character, phonetic character, E_REPEAT, E_FILL. */