/***********************************************************************\ Filename: unflect.c \***********************************************************************/ #include "thesmisc.h" #include #define VMASK 0x1104111 #define isvowel(c) ((1 << (c - 'a')) & VMASK) /* aeiouy */ static unfword(); static END *Unfstgptr; static END *cutoff1(); static uplural(); static udo_ness(); static udo_ly(); static unormend(); static udo_ment(); static udo_able(); static chkmultunf(); static multunftry(); static unfphrase(); static removunf(); /* the switch in unftry() will use the third member to proceed */ /* use this member if adding additional endings */ /* The ordering of endings is crucial in determining the precedence */ /* of endings */ /* the TH_ initialization is to give the part of speech of the root */ /* form and the fifth member is the relationship between */ /* root to its inflection. */ static END Stage[] = { {"ness", 4, 2, TH_ADJ|TH_ADJSP,AJ_NN}, {"ment", 4, 4, TH_VERB|TH_VERBSP,VB_NN}, {"able", 4, 5, TH_VERB|TH_VERBSP,VB_AJ}, {"ible", 4, 5, TH_VERB|TH_VERBSP,VB_AJ}, {"ments", 5, 4, TH_VERB|TH_VERBSP,VB_NN}, {"s", 1, 0, TH_NOUN|TH_VERB|TH_VERBSP,SI_PL}, {"s", 1, 0, TH_NOUN,SI_PL}, {"s", 1, 0, TH_SPNOUN,SP_PL}, {"s", 1, 0, TH_VERB|TH_VERBSP,TP_SI}, {"a", 1, 7, TH_NOUN,SI_PL}, {"men", 3, 6, TH_NOUN,SI_PL}, {"i", 1, 8, TH_NOUN,SI_PL}, {"x", 1, 9, TH_NOUN,SI_PL}, {"ly", 2, 1, TH_ADJ|TH_ADJSP,AJ_AV}, {"ing", 3, 3, TH_VERB|TH_VERBSP,PR_PC}, {"ed", 2, 3, TH_VERB,PR_PX}, {"ed", 2, 3, TH_VERBSP,SV_PX}, {"ed", 2, 3, TH_VERB,PR_PS}, {"ed", 2, 3, TH_VERB,PR_PP}, {"er", 2, 3, TH_ADJ,RE_CM}, {"est", 3, 3, TH_ADJ,RE_SU}, {"are", 3, 10, TH_VERB,TP_SP}, {"am", 2, 10, TH_VERB,TP_FP}, {NULL, 0, 0, 0,DN_CR} }; unflect(str, multunf, multstage) char *str; /* string to unflect */ char *multunf[]; /* pointers to unflected strings */ END *multstage[]; /* unflection stages */ { char *endptr; endptr = str; while (*endptr && *endptr != ' ') ++endptr; if (*endptr == ' ') return (unfphrase(str, endptr, multunf, multstage)); else return (unfword(str, multunf, multstage)); } static unfword(str, multunf, multstage) char *str; /* string to unflect */ char *multunf[]; /* pointers to unflected strings */ END *multstage[]; /* unflection stages */ { END *cutoff1(); END *unfstage(); char *endptr; int infindex; char *tmpptr; int stageind; int tmpind; char dummy[MAXWORD]; char tmpstr[MAXWORD]; strecpy(tmpstr, str); stageind = 0; endptr = str; /* see if the word is an exception */ /* if a singular to plural is indicated, it is for a noun */ if ((infindex = unfexc(str, dummy, UNF_IR)) != ERROR) { if (infindex == 0) return (FALSE); strecpy(str, dummy); if (infindex == PR_PS + 1) { if (unfexc(tmpstr,tmpstr,PR_PP + 1) > 0) infindex = PR_PX + 1; } if ((Unfstgptr = unfstage(infindex)) == NULL) return (FALSE); endptr = strchr(str, 0); stageind = chkmultunf(str, multstage, multunf, stageind); tmpptr = strchr(tmpstr, 0) - 1; if ((infindex == SI_PL + 1 || infindex == SP_PL + 1) && *tmpptr == 's') { *tmpptr = '\0'; if (infindex == SI_PL + 1) Unfstgptr += 2; else Unfstgptr -= 2; if (uplural(tmpstr, tmpptr)) { endptr++; strecpy(endptr, tmpstr); multunf[stageind] = endptr; multstage[stageind++] = Unfstgptr; } } multstage[stageind] = NULL; return (TRUE); } if ((Unfstgptr = cutoff1(endptr = strchr(str, 0))) == NULL) return (FALSE); endptr -= Unfstgptr->en_len; *endptr = '\0'; switch (Unfstgptr->en_class) { case 0: /* s */ if (!uplural(str, endptr)) return (FALSE); /* a plural form can be third person singular of a verb */ /* if the noun plural is in the exceptions, it can only */ /* be a verb. */ if ((infindex = unfexc(str, dummy, SI_PL)) > 0) Unfstgptr = unfstage(TP_SI + 1); break; case 1: /* ly */ if (!udo_ly(str, endptr)) return (FALSE); break; case 2: if (!udo_ness(str, endptr)) return (FALSE); break; case 3: /* ing, ed, er, est */ if (!unormend(str, endptr)) return (FALSE); break; case 4: if (*(strchr((char *) Unfstgptr->en_end, 0) - 1) != 's') { if (!udo_ment(str,endptr)) return (FALSE); break; } /* check for both pluralizing relation and verb -> noun */ strecpy(dummy, str); endptr = strecpy(str, tmpstr) - 1; *endptr = '\0'; strecpy(tmpstr, dummy); if (unftry(str)) { multstage[stageind] = unfstage(SI_PL + 1); multunf[stageind++] = str; endptr = strchr(endptr, 0) + 1; } else endptr = str; tmpptr = strchr(tmpstr, 0); if (udo_ment(tmpstr, tmpptr)) { strecpy(endptr, tmpstr); multunf[stageind] = endptr; multstage[stageind++] = Unfstgptr; endptr = strchr(endptr, 0); } *endptr = '\0'; multunf[stageind] = NULL; return (*str != '\0'); case 5: if (!udo_able(str, endptr)) return (FALSE); break; case 6: strecpy(endptr, "man"); /* men -> man */ if (!unftry(str)) return (FALSE); break; case 7: strecpy(endptr, "um"); /* stadia -> stadium */ if (!unftry(str)) return (FALSE); break; case 8: strecpy(endptr, "us"); /* syllabi -> syllabus */ if (!unftry(str)) return (FALSE); break; case 9: if (endptr[-1] != 'u' || !unftry(str)) return (FALSE); /* plateaux -> plateau */ break; case 10: if (*str != '\0') /* am/are -> be */ return (FALSE); strecpy(str, "be"); break; default: return (FALSE); } tmpind = chkmultunf(str, multstage, multunf, stageind); multunf[tmpind] = NULL; /* remove unflections that are derived from applying the */ /* rules if the root form can be derived from the exceptions */ if (!stageind) while (stageind < tmpind) { infindex = unfexc(multunf[stageind], tmpstr, Unfstgptr->en_rel); if (infindex == ERROR && Unfstgptr->en_rel == PR_PX) infindex = unfexc(multunf[stageind], tmpstr, PR_PS); if (infindex > 0) { tmpind--; removunf(&multunf[stageind], &multstage[stageind]); continue; } stageind++; } return (multunf[0] != NULL); } static END *cutoff1(endptr) char *endptr; { END *chkptr; chkptr = Stage; while (chkptr->en_end) { if (!strcmp(endptr - chkptr->en_len, (char *) chkptr->en_end)) return (chkptr); chkptr++; } return (NULL); } static uplural(str, endptr) char *str; char *endptr; { if (endptr[-1] != 'e') { if (endptr[-1] == 's') { /* kiss -> failure */ *endptr++ = 's'; *endptr = '\0'; return (FALSE); } return (unftry(str)); } switch (endptr[-2]) { case 'o': endptr[-1] = '\0'; if (unftry(str)) /* does -> do */ return (TRUE); endptr[-1] = 'e'; break; case 'i': endptr[-2] = 'y'; /* flies -> fly */ endptr[-1] = '\0'; if (unftry(str)) return (TRUE); endptr[-2] = 'i'; /* brownies -> brownie */ endptr[-1] = 'e'; return (unftry(str)); case 'h': if (endptr[-3] != 't') { /* clothes -> cloth */ endptr[-1] = '\0'; if (unftry(str)) return (TRUE); endptr[-1] = 'e'; } return (unftry(str)); /* ache -> aches */ case 'x': endptr[-1] = '\0'; /* boxes -> box */ return (unftry(str)); case 's': /* classes -> class */ if (endptr[-3] == endptr[-2]) { endptr[-1] = '\0'; /* busses -> buss */ if (unftry(str)) return (TRUE); endptr[-2] = '\0'; return (unftry(str)); } *endptr = '\0'; if (unftry(str)) /* noses -> nose */ return (TRUE); endptr[-1] = 'i'; *endptr = 's'; endptr[1] = '\0'; if (unftry(str)) /* theses -> thesis */ return (TRUE); endptr[-1] = '\0'; return (unftry(str)); /* proboscises -> proboscis */ case 'z': if (endptr[-3] == endptr[-2]) { endptr -= 2; *endptr = '\0'; /* whizzes -> whizz/whiz */ return (multunftry(str, endptr, "z", "")); } endptr[-1] = '\0'; if (unftry(str)) return (TRUE); endptr[-1] = 'e'; break; case 'v': if (unftry(str)) /* shelves -> shelve */ return (TRUE); endptr[-2] = 'f'; /* halves -> half */ endptr[-1] = '\0'; if (unftry(str)) return (TRUE); endptr[-1] = 'e'; /* knives -> knife */ break; case 'c': if (unftry(str)) /* vices -> vice */ return (TRUE); endptr[-2] = 'x'; endptr[-1] = '\0'; if (unftry(str)) /* matrices -> matrix */ return (TRUE); endptr[-3] = 'e'; return (unftry(str)); /* vertices -> vertex */ default: break; } *endptr = '\0'; return (unftry(str)); } static udo_ness(str, endptr) char *str; char *endptr; { if (endptr[-1] == 'i') { /* readiness -> ready */ endptr[-1] = 'y'; if (unftry(str)) return (TRUE); endptr[-1] = 'i'; } return (unftry(str)); } static udo_ly(str, endptr) char *str; char *endptr; { switch (endptr[-1]) { case 'i' : endptr[-1] = 'y'; /* readily -> ready */ return (unftry(str)); case 'l': /* accept more than one possible unflection */ /* e.g. analytically -> analytical/analytic */ if (endptr[-2] == 'a' && endptr[-3] == 'c') { endptr -= 2; *endptr = '\0'; return (multunftry(str, endptr, "al", "")); } *endptr = 'l'; endptr[1] = '\0'; if (unftry(str)) /* drolly -> droll */ return (TRUE); break; case 'b' : strecpy(endptr, "le"); /* crumbly -> crumble */ if (unftry(str)) return (TRUE); break; default: break; } *endptr = '\0'; return (unftry(str)); /* ly -> */ } static unormend(str, endptr) char *str; char *endptr; { char end1[MAXWORD]; char end2[MAXWORD]; if (Unfstgptr->en_rel == RE_CM) { strecpy(endptr, "est"); if (!unftry(str)) return (FALSE); } else if (Unfstgptr->en_rel == RE_SU) { strecpy(endptr, "er"); if (!unftry(str)) return (FALSE); } *endptr = '\0'; switch (endptr[-1]) { case 'i': endptr[-1] = 'y'; /* angriest -> angry */ if (unftry(str)) return (TRUE); endptr[-1] = 'i'; *endptr++ = 'e'; *endptr = '\0'; /* tied -> tie */ return (unftry(str)); case 'e': if (endptr[-2] == 'e') /* seeing -> see */ return (unftry(str)); if (!(Unfstgptr->en_pos & TH_ADJ)) if (unftry(str)) /* dieing -> die */ return (TRUE); *endptr++ = 'e'; /* bared -> bare */ *endptr = '\0'; return (unftry(str)); case 'u': *endptr++ = 'e'; *endptr = '\0'; if (unftry(str)) /* arguing -> argue */ return (TRUE); endptr[-1] = '\0'; return (unftry(str)); case 'y': if (Unfstgptr->en_rel == PR_PC) { strecpy(end1, (char *)"ie"); end2[0] = 'y'; end2[1] = '\0'; *--endptr = '\0'; if (multunftry(str, endptr, end1, end2)) return (TRUE); *endptr++ = 'y'; } end2[0] = 'e'; end1[0] = end2[1] = '\0'; return (multunftry(str, endptr, end1, end2)); case 'o': if (Unfstgptr->en_pos & (TH_VERB | TH_VERBSP)) { end2[0] = 'e'; end1[0] = end2[1] = '\0'; return (multunftry(str, endptr, end1, end2)); } case 'a': return (unftry(str)); case 'h': if (endptr[-2] == 't' && (Unfstgptr->en_pos & (TH_VERB | TH_VERBSP))) { *endptr++ = 'e'; *endptr = '\0'; if (unftry(str)) /* bathing -> bathe */ return (TRUE); endptr--; *endptr = '\0'; } if (unftry(str)) /* highest -> high */ return (TRUE); *endptr++ = 'e'; /* lithest -> lithe */ *endptr = '\0'; return (unftry(str)); case 'k': if (endptr[-2] == 'c') { if (unftry(str)) /* packing -> pack */ return (TRUE); endptr[-1] = '\0'; return (unftry(str)); /* panicking -> panic */ } break; case 'b': case 'd': case 'f': case 'g': case 'l': case 'm': case 'n': case 'p': case 't': if (isvowel(endptr[-2]) && strlen((char *)str) < 4) { *endptr = endptr[-1]; strecpy(endptr + 1, "ing"); if (unftry(str)) { *endptr = 'e'; endptr[1] = '\0'; return (unftry(str)); /* bated -> bate */ } *endptr = '\0'; } break; default: break; } /* accept more than one possible unflection */ /* e.g. bussing -> buss/bus, lapping -> lapp/lap */ if (endptr[-1] == endptr[-2]) { end1[0] = endptr[-1]; end2[0] = end1[1] = '\0'; *--endptr = '\0'; return (multunftry(str, endptr, end1, end2)); } end1[0] = 'e'; end1[1] = end2[0] = '\0'; /* casting -> caste/cast */ return (multunftry(str, endptr, end1, end2)); } static udo_ment(str, endptr) char *str; char *endptr; { if (endptr[-1] != 'a') { if (endptr[-1] == 'g' && endptr[-2] == 'd') { *endptr++ = 'e'; *endptr = '\0'; /* acknowledgment -> -ledge */ return(unftry(str)); } return(unftry(str)); } return (FALSE); } static udo_able(str, endptr) char *str; char *endptr; { if (endptr[-1] == endptr[-2]) { if (unftry(str)) return(TRUE); if (strchr("bdfglmnprt", (int) endptr[-1])) { endptr[-1] = '\0'; return (unftry(str)); } if (endptr[-2] == 's') { endptr[-2] = 't'; endptr[-1] = '\0'; if (unftry(str)) /* permissible -> permit */ return (TRUE); } return (FALSE); } if (endptr[-1] == 'i') { strecpy(endptr, "ate"); if (unftry(str)) /* satiable -> satiate */ return (TRUE); endptr[-1] = 'y'; *endptr = '\0'; return (unftry(str)); } if (endptr[-1] == 's') { if (endptr[-2] == 'n') { /* defensible -> defend */ endptr[-1] = 'd'; *endptr = '\0'; if (unftry(str)) return (TRUE); endptr[-1] = 's'; } *endptr = 'e'; endptr[1] = '\0'; return (unftry(str)); } if (strchr("cglnr", (int) endptr[-1])) { strecpy(endptr, "ate"); /* venerable -> venerate */ if (unftry(str)) return (TRUE); *endptr = '\0'; if (unftry(str)) return (TRUE); } if (endptr - str >= 3) { return (multunftry(str, endptr, "e", "")); } else return (unftry(str)); } END *unfstage(index) int index; { int i; /* the index to search is root->inflection */ /* so subtract 1 */ --index; for (i = 0; Stage[i].en_end != NULL; i++) if (Stage[i].en_rel == index) return (index == SI_PL ? &Stage[i + 1] : &Stage[i]); return NULL; } /* check the string for multiple unflections */ static chkmultunf(str, stage, multunf, ind) char *str; END *stage[]; char *multunf[]; int ind; { char *first; if (*(first = str) == '\0') return (FALSE); stage[ind] = Unfstgptr; multunf[ind++] = first; /* if a multiple unflection is indicated, duplicate the */ /* stageptr for the unflected forms */ while (*first != '\0') if (*first++ == UNFSEP) { *(first - 1) = '\0'; if (strcmp(str, first) != 0) { stage[ind] = stage[ind - 1]; multunf[ind++] = first; } } return (ind); } static multunftry(str, endptr, end1, end2) char *str; char *endptr; char *end1; char *end2; { char *tmpptr; char tmpstr[MAXWORD]; tmpptr = strecpy(strecpy(tmpstr, str), end1); if (!unftry(tmpstr)) *(tmpptr = tmpstr) = '\0'; strecpy(endptr, end2); if (unftry(str)) { if (tmpptr != tmpstr) *tmpptr++ = UNFSEP; strecpy(tmpptr, str); } strecpy(str, tmpstr); return (*str != '\0'); } /* unflect a phrase. If the phrase is an exception, return the */ /* the unflected phrase. Otherwise, attempt unflection of the first */ /* word as a verb and the last word as a noun */ static unfphrase(str, endptr, multunf, multstage) char *str; char *endptr; char *multunf[]; END *multstage[]; { char *tmpptr; int infindex; int stageind; int retval; char rtstr[2 * MAXWORD]; char ltstr[MAXWORD]; char tmpstr[MAXWORD]; char vbstr[MAXWORD]; retval = FALSE; stageind = 0; if ((infindex = unfexc(str, tmpstr, UNF_IR)) != ERROR) { if (infindex == 0) return (FALSE); if ((Unfstgptr = unfstage(infindex)) == NULL) return (FALSE); strecpy(str, tmpstr); multstage[stageind] = Unfstgptr; multunf[stageind++] = str; multunf[stageind] = NULL; return (TRUE); } tmpptr = strecpy(ltstr, str) - 1; while (*tmpptr != ' ' && tmpptr != ltstr) tmpptr--; if (tmpptr++ == ltstr) return (FALSE); strecpy(rtstr, tmpptr); strecpy(vbstr, endptr); *tmpptr = '\0'; *endptr = '\0'; strecpy(tmpstr, str); stageind = 0; if (unfword(tmpstr, multunf, multstage)) { while (multunf[stageind] != NULL) { if (multstage[stageind]->en_rel == SI_PL) multstage[stageind] = unfstage(TP_SI + 1); switch (multstage[stageind]->en_rel) { case PR_PC: case PR_PS: case PR_PP: case PR_PX: case TP_SI: strecpy(str, multunf[stageind]); appndstr(str, vbstr, UNFCODE); multunf[stageind++] = str; str = strchr(multunf[stageind-1], 0) + 1; break; default: removunf(&multunf[stageind], &multstage[stageind]); Unfstgptr = NULL; } } if (stageind) { str = strchr(multunf[stageind-1], 0) + 1; retval = TRUE; } } if (unfword(rtstr, &multunf[stageind], &multstage[stageind])) while (multstage[stageind] != NULL) { if (multstage[stageind]->en_rel != SI_PL) { removunf(&multunf[stageind], &multstage[stageind]); continue; } tmpptr = strecpy(str, ltstr); strecpy(tmpptr, multunf[stageind]); multunf[stageind++] = str; str = strchr(str, 0) + 1; retval = TRUE; } return (retval); } static removunf(unfptr,stgptr) char **unfptr; END **stgptr; { int i; i = 0; while (unfptr[i] != NULL && i < MAXUNF) { unfptr[i] = unfptr[i + 1]; stgptr[i] = stgptr[i + 1]; i++; } }