antoine-source/appleworksgs/Spell/Src/UNFLECT.C
2023-03-04 03:45:20 +01:00

1 line
23 KiB
C
Executable File

/***********************************************************************\
Filename: unflect.c
\***********************************************************************/
#include "thesmisc.h"
#include <string.h>
#define VMASK 0x1104111
#define isvowel(c) ((1 << (c - 'a')) & VMASK) /* aeiouy */
static unfword();
static END *Unfstgptr;
static END *cutoff1();
static uplural();
static udo_ness();
static udo_ly();
static unormend();
static udo_ment();
static udo_able();
static chkmultunf();
static multunftry();
static unfphrase();
static removunf();
/* the switch in unftry() will use the third member to proceed */
/* use this member if adding additional endings */
/* The ordering of endings is crucial in determining the precedence */
/* of endings */
/* the TH_ initialization is to give the part of speech of the root */
/* form and the fifth member is the relationship between */
/* root to its inflection. */
static END Stage[] =
{
{"ness", 4, 2, TH_ADJ|TH_ADJSP,AJ_NN},
{"ment", 4, 4, TH_VERB|TH_VERBSP,VB_NN},
{"able", 4, 5, TH_VERB|TH_VERBSP,VB_AJ},
{"ible", 4, 5, TH_VERB|TH_VERBSP,VB_AJ},
{"ments", 5, 4, TH_VERB|TH_VERBSP,VB_NN},
{"s", 1, 0, TH_NOUN|TH_VERB|TH_VERBSP,SI_PL},
{"s", 1, 0, TH_NOUN,SI_PL},
{"s", 1, 0, TH_SPNOUN,SP_PL},
{"s", 1, 0, TH_VERB|TH_VERBSP,TP_SI},
{"a", 1, 7, TH_NOUN,SI_PL},
{"men", 3, 6, TH_NOUN,SI_PL},
{"i", 1, 8, TH_NOUN,SI_PL},
{"x", 1, 9, TH_NOUN,SI_PL},
{"ly", 2, 1, TH_ADJ|TH_ADJSP,AJ_AV},
{"ing", 3, 3, TH_VERB|TH_VERBSP,PR_PC},
{"ed", 2, 3, TH_VERB,PR_PX},
{"ed", 2, 3, TH_VERBSP,SV_PX},
{"ed", 2, 3, TH_VERB,PR_PS},
{"ed", 2, 3, TH_VERB,PR_PP},
{"er", 2, 3, TH_ADJ,RE_CM},
{"est", 3, 3, TH_ADJ,RE_SU},
{"are", 3, 10, TH_VERB,TP_SP},
{"am", 2, 10, TH_VERB,TP_FP},
{NULL, 0, 0, 0,DN_CR}
};
unflect(str, multunf, multstage)
char *str; /* string to unflect */
char *multunf[]; /* pointers to unflected strings */
END *multstage[]; /* unflection stages */
{
char *endptr;
endptr = str;
while (*endptr && *endptr != ' ')
++endptr;
if (*endptr == ' ')
return (unfphrase(str, endptr, multunf, multstage));
else
return (unfword(str, multunf, multstage));
}
static unfword(str, multunf, multstage)
char *str; /* string to unflect */
char *multunf[]; /* pointers to unflected strings */
END *multstage[]; /* unflection stages */
{
END *cutoff1();
END *unfstage();
char *endptr;
int infindex;
char *tmpptr;
int stageind;
int tmpind;
char dummy[MAXWORD];
char tmpstr[MAXWORD];
strecpy(tmpstr, str);
stageind = 0;
endptr = str;
/* see if the word is an exception */
/* if a singular to plural is indicated, it is for a noun */
if ((infindex = unfexc(str, dummy, UNF_IR)) != ERROR)
{
if (infindex == 0)
return (FALSE);
strecpy(str, dummy);
if (infindex == PR_PS + 1)
{
if (unfexc(tmpstr,tmpstr,PR_PP + 1) > 0)
infindex = PR_PX + 1;
}
if ((Unfstgptr = unfstage(infindex)) == NULL)
return (FALSE);
endptr = strchr(str, 0);
stageind = chkmultunf(str, multstage, multunf, stageind);
tmpptr = strchr(tmpstr, 0) - 1;
if ((infindex == SI_PL + 1 || infindex == SP_PL + 1)
&& *tmpptr == 's')
{
*tmpptr = '\0';
if (infindex == SI_PL + 1)
Unfstgptr += 2;
else
Unfstgptr -= 2;
if (uplural(tmpstr, tmpptr))
{
endptr++;
strecpy(endptr, tmpstr);
multunf[stageind] = endptr;
multstage[stageind++] = Unfstgptr;
}
}
multstage[stageind] = NULL;
return (TRUE);
}
if ((Unfstgptr = cutoff1(endptr = strchr(str, 0))) == NULL)
return (FALSE);
endptr -= Unfstgptr->en_len;
*endptr = '\0';
switch (Unfstgptr->en_class)
{
case 0: /* s */
if (!uplural(str, endptr))
return (FALSE);
/* a plural form can be third person singular of a verb */
/* if the noun plural is in the exceptions, it can only */
/* be a verb. */
if ((infindex = unfexc(str, dummy, SI_PL)) > 0)
Unfstgptr = unfstage(TP_SI + 1);
break;
case 1: /* ly */
if (!udo_ly(str, endptr))
return (FALSE);
break;
case 2:
if (!udo_ness(str, endptr))
return (FALSE);
break;
case 3: /* ing, ed, er, est */
if (!unormend(str, endptr))
return (FALSE);
break;
case 4:
if (*(strchr((char *) Unfstgptr->en_end, 0) - 1) != 's')
{
if (!udo_ment(str,endptr))
return (FALSE);
break;
}
/* check for both pluralizing relation and verb -> noun */
strecpy(dummy, str);
endptr = strecpy(str, tmpstr) - 1;
*endptr = '\0';
strecpy(tmpstr, dummy);
if (unftry(str))
{
multstage[stageind] = unfstage(SI_PL + 1);
multunf[stageind++] = str;
endptr = strchr(endptr, 0) + 1;
}
else
endptr = str;
tmpptr = strchr(tmpstr, 0);
if (udo_ment(tmpstr, tmpptr))
{
strecpy(endptr, tmpstr);
multunf[stageind] = endptr;
multstage[stageind++] = Unfstgptr;
endptr = strchr(endptr, 0);
}
*endptr = '\0';
multunf[stageind] = NULL;
return (*str != '\0');
case 5:
if (!udo_able(str, endptr))
return (FALSE);
break;
case 6:
strecpy(endptr, "man"); /* <word>men -> <word>man */
if (!unftry(str))
return (FALSE);
break;
case 7:
strecpy(endptr, "um"); /* stadia -> stadium */
if (!unftry(str))
return (FALSE);
break;
case 8:
strecpy(endptr, "us"); /* syllabi -> syllabus */
if (!unftry(str))
return (FALSE);
break;
case 9:
if (endptr[-1] != 'u' || !unftry(str))
return (FALSE); /* plateaux -> plateau */
break;
case 10:
if (*str != '\0') /* am/are -> be */
return (FALSE);
strecpy(str, "be");
break;
default:
return (FALSE);
}
tmpind = chkmultunf(str, multstage, multunf, stageind);
multunf[tmpind] = NULL;
/* remove unflections that are derived from applying the */
/* rules if the root form can be derived from the exceptions */
if (!stageind)
while (stageind < tmpind)
{
infindex = unfexc(multunf[stageind], tmpstr, Unfstgptr->en_rel);
if (infindex == ERROR && Unfstgptr->en_rel == PR_PX)
infindex = unfexc(multunf[stageind], tmpstr, PR_PS);
if (infindex > 0)
{
tmpind--;
removunf(&multunf[stageind], &multstage[stageind]);
continue;
}
stageind++;
}
return (multunf[0] != NULL);
}
static END *cutoff1(endptr)
char *endptr;
{
END *chkptr;
chkptr = Stage;
while (chkptr->en_end)
{
if (!strcmp(endptr - chkptr->en_len, (char *) chkptr->en_end))
return (chkptr);
chkptr++;
}
return (NULL);
}
static uplural(str, endptr)
char *str;
char *endptr;
{
if (endptr[-1] != 'e')
{
if (endptr[-1] == 's')
{ /* kiss -> failure */
*endptr++ = 's';
*endptr = '\0';
return (FALSE);
}
return (unftry(str));
}
switch (endptr[-2])
{
case 'o':
endptr[-1] = '\0';
if (unftry(str)) /* does -> do */
return (TRUE);
endptr[-1] = 'e';
break;
case 'i':
endptr[-2] = 'y'; /* flies -> fly */
endptr[-1] = '\0';
if (unftry(str))
return (TRUE);
endptr[-2] = 'i'; /* brownies -> brownie */
endptr[-1] = 'e';
return (unftry(str));
case 'h':
if (endptr[-3] != 't')
{ /* clothes -> cloth */
endptr[-1] = '\0';
if (unftry(str))
return (TRUE);
endptr[-1] = 'e';
}
return (unftry(str)); /* ache -> aches */
case 'x':
endptr[-1] = '\0'; /* boxes -> box */
return (unftry(str));
case 's': /* classes -> class */
if (endptr[-3] == endptr[-2])
{
endptr[-1] = '\0'; /* busses -> buss */
if (unftry(str))
return (TRUE);
endptr[-2] = '\0';
return (unftry(str));
}
*endptr = '\0';
if (unftry(str)) /* noses -> nose */
return (TRUE);
endptr[-1] = 'i';
*endptr = 's';
endptr[1] = '\0';
if (unftry(str)) /* theses -> thesis */
return (TRUE);
endptr[-1] = '\0';
return (unftry(str)); /* proboscises -> proboscis */
case 'z':
if (endptr[-3] == endptr[-2])
{
endptr -= 2;
*endptr = '\0'; /* whizzes -> whizz/whiz */
return (multunftry(str, endptr, "z", ""));
}
endptr[-1] = '\0';
if (unftry(str))
return (TRUE);
endptr[-1] = 'e';
break;
case 'v':
if (unftry(str)) /* shelves -> shelve */
return (TRUE);
endptr[-2] = 'f'; /* halves -> half */
endptr[-1] = '\0';
if (unftry(str))
return (TRUE);
endptr[-1] = 'e'; /* knives -> knife */
break;
case 'c':
if (unftry(str)) /* vices -> vice */
return (TRUE);
endptr[-2] = 'x';
endptr[-1] = '\0';
if (unftry(str)) /* matrices -> matrix */
return (TRUE);
endptr[-3] = 'e';
return (unftry(str)); /* vertices -> vertex */
default:
break;
}
*endptr = '\0';
return (unftry(str));
}
static udo_ness(str, endptr)
char *str;
char *endptr;
{
if (endptr[-1] == 'i')
{ /* readiness -> ready */
endptr[-1] = 'y';
if (unftry(str))
return (TRUE);
endptr[-1] = 'i';
}
return (unftry(str));
}
static udo_ly(str, endptr)
char *str;
char *endptr;
{
switch (endptr[-1])
{
case 'i' :
endptr[-1] = 'y'; /* readily -> ready */
return (unftry(str));
case 'l':
/* accept more than one possible unflection */
/* e.g. analytically -> analytical/analytic */
if (endptr[-2] == 'a' && endptr[-3] == 'c')
{
endptr -= 2;
*endptr = '\0';
return (multunftry(str, endptr, "al", ""));
}
*endptr = 'l';
endptr[1] = '\0';
if (unftry(str)) /* drolly -> droll */
return (TRUE);
break;
case 'b' :
strecpy(endptr, "le"); /* crumbly -> crumble */
if (unftry(str))
return (TRUE);
break;
default:
break;
}
*endptr = '\0';
return (unftry(str)); /* <word>ly -> <word> */
}
static unormend(str, endptr)
char *str;
char *endptr;
{
char end1[MAXWORD];
char end2[MAXWORD];
if (Unfstgptr->en_rel == RE_CM)
{
strecpy(endptr, "est");
if (!unftry(str))
return (FALSE);
}
else if (Unfstgptr->en_rel == RE_SU)
{
strecpy(endptr, "er");
if (!unftry(str))
return (FALSE);
}
*endptr = '\0';
switch (endptr[-1])
{
case 'i':
endptr[-1] = 'y'; /* angriest -> angry */
if (unftry(str))
return (TRUE);
endptr[-1] = 'i';
*endptr++ = 'e';
*endptr = '\0'; /* tied -> tie */
return (unftry(str));
case 'e':
if (endptr[-2] == 'e') /* seeing -> see */
return (unftry(str));
if (!(Unfstgptr->en_pos & TH_ADJ))
if (unftry(str)) /* dieing -> die */
return (TRUE);
*endptr++ = 'e'; /* bared -> bare */
*endptr = '\0';
return (unftry(str));
case 'u':
*endptr++ = 'e';
*endptr = '\0';
if (unftry(str)) /* arguing -> argue */
return (TRUE);
endptr[-1] = '\0';
return (unftry(str));
case 'y':
if (Unfstgptr->en_rel == PR_PC)
{
strecpy(end1, (char *)"ie");
end2[0] = 'y';
end2[1] = '\0';
*--endptr = '\0';
if (multunftry(str, endptr, end1, end2))
return (TRUE);
*endptr++ = 'y';
}
end2[0] = 'e';
end1[0] = end2[1] = '\0';
return (multunftry(str, endptr, end1, end2));
case 'o':
if (Unfstgptr->en_pos & (TH_VERB | TH_VERBSP))
{
end2[0] = 'e';
end1[0] = end2[1] = '\0';
return (multunftry(str, endptr, end1, end2));
}
case 'a':
return (unftry(str));
case 'h':
if (endptr[-2] == 't' && (Unfstgptr->en_pos & (TH_VERB |
TH_VERBSP)))
{
*endptr++ = 'e';
*endptr = '\0';
if (unftry(str)) /* bathing -> bathe */
return (TRUE);
endptr--;
*endptr = '\0';
}
if (unftry(str)) /* highest -> high */
return (TRUE);
*endptr++ = 'e'; /* lithest -> lithe */
*endptr = '\0';
return (unftry(str));
case 'k':
if (endptr[-2] == 'c')
{
if (unftry(str)) /* packing -> pack */
return (TRUE);
endptr[-1] = '\0';
return (unftry(str)); /* panicking -> panic */
}
break;
case 'b':
case 'd':
case 'f':
case 'g':
case 'l':
case 'm':
case 'n':
case 'p':
case 't':
if (isvowel(endptr[-2]) && strlen((char *)str) < 4)
{
*endptr = endptr[-1];
strecpy(endptr + 1, "ing");
if (unftry(str))
{
*endptr = 'e';
endptr[1] = '\0';
return (unftry(str)); /* bated -> bate */
}
*endptr = '\0';
}
break;
default:
break;
}
/* accept more than one possible unflection */
/* e.g. bussing -> buss/bus, lapping -> lapp/lap */
if (endptr[-1] == endptr[-2])
{
end1[0] = endptr[-1];
end2[0] = end1[1] = '\0';
*--endptr = '\0';
return (multunftry(str, endptr, end1, end2));
}
end1[0] = 'e';
end1[1] = end2[0] = '\0'; /* casting -> caste/cast */
return (multunftry(str, endptr, end1, end2));
}
static udo_ment(str, endptr)
char *str;
char *endptr;
{
if (endptr[-1] != 'a')
{
if (endptr[-1] == 'g' && endptr[-2] == 'd')
{
*endptr++ = 'e';
*endptr = '\0'; /* acknowledgment -> -ledge */
return(unftry(str));
}
return(unftry(str));
}
return (FALSE);
}
static udo_able(str, endptr)
char *str;
char *endptr;
{
if (endptr[-1] == endptr[-2])
{
if (unftry(str))
return(TRUE);
if (strchr("bdfglmnprt", (int) endptr[-1]))
{
endptr[-1] = '\0';
return (unftry(str));
}
if (endptr[-2] == 's')
{
endptr[-2] = 't';
endptr[-1] = '\0';
if (unftry(str)) /* permissible -> permit */
return (TRUE);
}
return (FALSE);
}
if (endptr[-1] == 'i')
{
strecpy(endptr, "ate");
if (unftry(str)) /* satiable -> satiate */
return (TRUE);
endptr[-1] = 'y';
*endptr = '\0';
return (unftry(str));
}
if (endptr[-1] == 's')
{
if (endptr[-2] == 'n')
{ /* defensible -> defend */
endptr[-1] = 'd';
*endptr = '\0';
if (unftry(str))
return (TRUE);
endptr[-1] = 's';
}
*endptr = 'e';
endptr[1] = '\0';
return (unftry(str));
}
if (strchr("cglnr", (int) endptr[-1]))
{
strecpy(endptr, "ate"); /* venerable -> venerate */
if (unftry(str))
return (TRUE);
*endptr = '\0';
if (unftry(str))
return (TRUE);
}
if (endptr - str >= 3)
{
return (multunftry(str, endptr, "e", ""));
}
else
return (unftry(str));
}
END *unfstage(index)
int index;
{
int i;
/* the index to search is root->inflection */
/* so subtract 1 */
--index;
for (i = 0; Stage[i].en_end != NULL; i++)
if (Stage[i].en_rel == index)
return (index == SI_PL ? &Stage[i + 1] : &Stage[i]);
return NULL;
}
/* check the string for multiple unflections */
static chkmultunf(str, stage, multunf, ind)
char *str;
END *stage[];
char *multunf[];
int ind;
{
char *first;
if (*(first = str) == '\0')
return (FALSE);
stage[ind] = Unfstgptr;
multunf[ind++] = first;
/* if a multiple unflection is indicated, duplicate the */
/* stageptr for the unflected forms */
while (*first != '\0')
if (*first++ == UNFSEP)
{
*(first - 1) = '\0';
if (strcmp(str, first) != 0)
{
stage[ind] = stage[ind - 1];
multunf[ind++] = first;
}
}
return (ind);
}
static multunftry(str, endptr, end1, end2)
char *str;
char *endptr;
char *end1;
char *end2;
{
char *tmpptr;
char tmpstr[MAXWORD];
tmpptr = strecpy(strecpy(tmpstr, str), end1);
if (!unftry(tmpstr))
*(tmpptr = tmpstr) = '\0';
strecpy(endptr, end2);
if (unftry(str))
{
if (tmpptr != tmpstr)
*tmpptr++ = UNFSEP;
strecpy(tmpptr, str);
}
strecpy(str, tmpstr);
return (*str != '\0');
}
/* unflect a phrase. If the phrase is an exception, return the */
/* the unflected phrase. Otherwise, attempt unflection of the first */
/* word as a verb and the last word as a noun */
static unfphrase(str, endptr, multunf, multstage)
char *str;
char *endptr;
char *multunf[];
END *multstage[];
{
char *tmpptr;
int infindex;
int stageind;
int retval;
char rtstr[2 * MAXWORD];
char ltstr[MAXWORD];
char tmpstr[MAXWORD];
char vbstr[MAXWORD];
retval = FALSE;
stageind = 0;
if ((infindex = unfexc(str, tmpstr, UNF_IR)) != ERROR)
{
if (infindex == 0)
return (FALSE);
if ((Unfstgptr = unfstage(infindex)) == NULL)
return (FALSE);
strecpy(str, tmpstr);
multstage[stageind] = Unfstgptr;
multunf[stageind++] = str;
multunf[stageind] = NULL;
return (TRUE);
}
tmpptr = strecpy(ltstr, str) - 1;
while (*tmpptr != ' ' && tmpptr != ltstr)
tmpptr--;
if (tmpptr++ == ltstr)
return (FALSE);
strecpy(rtstr, tmpptr);
strecpy(vbstr, endptr);
*tmpptr = '\0';
*endptr = '\0';
strecpy(tmpstr, str);
stageind = 0;
if (unfword(tmpstr, multunf, multstage))
{
while (multunf[stageind] != NULL)
{
if (multstage[stageind]->en_rel == SI_PL)
multstage[stageind] = unfstage(TP_SI + 1);
switch (multstage[stageind]->en_rel)
{
case PR_PC:
case PR_PS:
case PR_PP:
case PR_PX:
case TP_SI:
strecpy(str, multunf[stageind]);
appndstr(str, vbstr, UNFCODE);
multunf[stageind++] = str;
str = strchr(multunf[stageind-1], 0) + 1;
break;
default:
removunf(&multunf[stageind], &multstage[stageind]);
Unfstgptr = NULL;
}
}
if (stageind)
{
str = strchr(multunf[stageind-1], 0) + 1;
retval = TRUE;
}
}
if (unfword(rtstr, &multunf[stageind], &multstage[stageind]))
while (multstage[stageind] != NULL)
{
if (multstage[stageind]->en_rel != SI_PL)
{
removunf(&multunf[stageind], &multstage[stageind]);
continue;
}
tmpptr = strecpy(str, ltstr);
strecpy(tmpptr, multunf[stageind]);
multunf[stageind++] = str;
str = strchr(str, 0) + 1;
retval = TRUE;
}
return (retval);
}
static removunf(unfptr,stgptr)
char **unfptr;
END **stgptr;
{
int i;
i = 0;
while (unfptr[i] != NULL && i < MAXUNF)
{
unfptr[i] = unfptr[i + 1];
stgptr[i] = stgptr[i + 1];
i++;
}
}