cc65/src/common/matchpat.c

241 lines
8.2 KiB
C

/*****************************************************************************/
/* */
/* matchpat.c */
/* */
/* Unix shell like pattern matching */
/* */
/* */
/* */
/* (C) 2002 Ullrich von Bassewitz */
/* Wacholderweg 14 */
/* D-70597 Stuttgart */
/* EMail: uz@musoftware.de */
/* */
/* */
/* This software is provided 'as-is', without any expressed or implied */
/* warranty. In no event will the authors be held liable for any damages */
/* arising from the use of this software. */
/* */
/* Permission is granted to anyone to use this software for any purpose, */
/* including commercial applications, and to alter it and redistribute it */
/* freely, subject to the following restrictions: */
/* */
/* 1. The origin of this software must not be misrepresented; you must not */
/* claim that you wrote the original software. If you use this software */
/* in a product, an acknowledgment in the product documentation would be */
/* appreciated but is not required. */
/* 2. Altered source versions must be plainly marked as such, and must not */
/* be misrepresented as being the original software. */
/* 3. This notice may not be removed or altered from any source */
/* distribution. */
/* */
/*****************************************************************************/
#include <string.h>
/* common */
#include "matchpat.h"
/*****************************************************************************/
/* Character bit set implementation */
/*****************************************************************************/
typedef unsigned char CharSet[32]; /* 256 bits */
/* Clear a character set */
#define CS_CLEAR(CS) memset (CS, 0, sizeof (CharSet))
/* Set all characters in the set */
#define CS_SETALL(CS) memset (CS, 0xFF, sizeof (CharSet))
/* Add one char to the set */
#define CS_ADD(CS, C) ((CS)[(C) >> 3] |= (0x01 << ((C) & 0x07)))
/* Check if a character is a member of the set */
#define CS_CONTAINS(CS, C) ((CS)[(C) >> 3] & (0x01 << ((C) & 0x07)))
/* Invert a character set */
#define CS_INVERT(CS) \
do { \
unsigned I; \
for (I = 0; I < sizeof (CharSet); ++I) { \
CS[I] ^= 0xFF; \
} \
} while (0)
/*****************************************************************************/
/* Code */
/*****************************************************************************/
/* Escape character */
#define ESCAPE_CHAR '\\'
/* Utility macro used in RecursiveMatch */
#define IncPattern() Pattern++; \
if (*Pattern == '\0') { \
return 0; \
}
static int RealChar (const unsigned char* Pattern)
/* Return the next character from Pattern. If the next character is the
** escape character, skip it and return the following.
*/
{
if (*Pattern == ESCAPE_CHAR) {
Pattern++;
return (*Pattern == '\0') ? -1 : *Pattern;
} else {
return *Pattern;
}
}
static int RecursiveMatch (const unsigned char* Source, const unsigned char* Pattern)
/* A recursive pattern matcher */
{
CharSet CS;
while (1) {
if (*Pattern == '\0') {
/* Reached the end of Pattern, what about Source? */
return (*Source == '\0') ? 1 : 0;
} else if (*Pattern == '*') {
if (*++Pattern == '\0') {
/* A trailing '*' is always a match */
return 1;
}
/* Check the rest of the string */
while (*Source) {
if (RecursiveMatch (Source++, Pattern)) {
/* Match! */
return 1;
}
}
/* No match... */
return 0;
} else if (*Source == '\0') {
/* End of Source reached, no match */
return 0;
} else {
/* Check a single char. Build a set of all possible characters in
** CS, then check if the current char of Source is contained in
** there.
*/
CS_CLEAR (CS); /* Clear the character set */
if (*Pattern == '?') {
/* All chars are allowed */
CS_SETALL (CS);
++Pattern; /* Skip '?' */
} else if (*Pattern == ESCAPE_CHAR) {
/* Use the next char as is */
IncPattern ();
CS_ADD (CS, *Pattern);
++Pattern; /* Skip the character */
} else if (*Pattern == '[') {
/* A set follows */
int Invert = 0;
IncPattern ();
if (*Pattern == '!') {
IncPattern ();
Invert = 1;
}
while (*Pattern != ']') {
int C1;
if ((C1 = RealChar (Pattern)) == -1) {
return 0;
}
IncPattern ();
if (*Pattern != '-') {
CS_ADD (CS, C1);
} else {
int C2;
unsigned char C;
IncPattern ();
if ((C2 = RealChar (Pattern)) == -1) {
return 0;
}
IncPattern ();
for (C = C1; C <= C2; C++) {
CS_ADD (CS, C);
}
}
}
/* Skip ']' */
++Pattern;
if (Invert) {
/* Reverse all bits in the set */
CS_INVERT (CS);
}
} else {
/* Include the char in the charset, then skip it */
CS_ADD (CS, *Pattern);
++Pattern;
}
if (!CS_CONTAINS (CS, *Source)) {
/* No match */
return 0;
}
++Source;
}
}
}
int MatchPattern (const char* Source, const char* Pattern)
/* Match the string in Source against Pattern. Pattern may contain the
** wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The
** function returns a value of zero if Source does not match Pattern,
** otherwise a non zero value is returned. If Pattern contains an invalid
** wildcard pattern (e.g. 'A[x'), the function returns zero.
*/
{
/* Handle the trivial cases */
if (Pattern == 0 || *Pattern == '\0') {
return (Source == 0 || *Source == '\0');
}
/* Do the real thing */
return RecursiveMatch ((const unsigned char*) Source, (const unsigned char*) Pattern);
}