From 3f503a78e5815a2994718305c9a46f99b74d0eee Mon Sep 17 00:00:00 2001 From: cuz Date: Mon, 8 Apr 2002 19:55:12 +0000 Subject: [PATCH] New pattern matching module matchpat.[ch] git-svn-id: svn://svn.cc65.org/cc65/trunk@1233 b7a2c559-68d2-44c3-8de9-860c34a00d81 --- src/common/make/gcc.mak | 1 + src/common/make/watcom.mak | 3 +- src/common/matchpat.c | 243 +++++++++++++++++++++++++++++++++++++ src/common/matchpat.h | 66 ++++++++++ 4 files changed, 312 insertions(+), 1 deletion(-) create mode 100644 src/common/matchpat.c create mode 100644 src/common/matchpat.h diff --git a/src/common/make/gcc.mak b/src/common/make/gcc.mak index b29067c34..24dec8946 100644 --- a/src/common/make/gcc.mak +++ b/src/common/make/gcc.mak @@ -19,6 +19,7 @@ OBJS = abend.o \ filepos.o \ fname.o \ hashstr.o \ + matchpat.o \ print.o \ strbuf.o \ strutil.o \ diff --git a/src/common/make/watcom.mak b/src/common/make/watcom.mak index 6fc3d91ad..e72d7cec1 100644 --- a/src/common/make/watcom.mak +++ b/src/common/make/watcom.mak @@ -75,6 +75,7 @@ OBJS = abend.obj \ filepos.obj \ fname.obj \ hashstr.obj \ + matchpat.obj \ print.obj \ strbuf.obj \ strutil.obj \ @@ -103,7 +104,7 @@ clean: @if exist *.obj del *.obj @if exist $(LIB) del $(LIB) - + diff --git a/src/common/matchpat.c b/src/common/matchpat.c new file mode 100644 index 000000000..c14b95da4 --- /dev/null +++ b/src/common/matchpat.c @@ -0,0 +1,243 @@ +/*****************************************************************************/ +/* */ +/* matchpat.c */ +/* */ +/* Unix shell like pattern matching */ +/* */ +/* */ +/* */ +/* (C) 2002 Ullrich von Bassewitz */ +/* Wacholderweg 14 */ +/* D-70597 Stuttgart */ +/* EMail: uz@musoftware.de */ +/* */ +/* */ +/* This software is provided 'as-is', without any expressed or implied */ +/* warranty. In no event will the authors be held liable for any damages */ +/* arising from the use of this software. */ +/* */ +/* Permission is granted to anyone to use this software for any purpose, */ +/* including commercial applications, and to alter it and redistribute it */ +/* freely, subject to the following restrictions: */ +/* */ +/* 1. The origin of this software must not be misrepresented; you must not */ +/* claim that you wrote the original software. If you use this software */ +/* in a product, an acknowledgment in the product documentation would be */ +/* appreciated but is not required. */ +/* 2. Altered source versions must be plainly marked as such, and must not */ +/* be misrepresented as being the original software. */ +/* 3. This notice may not be removed or altered from any source */ +/* distribution. */ +/* */ +/*****************************************************************************/ + + + +#include + +/* common */ +#include "matchpat.h" + + + +/*****************************************************************************/ +/* Character bit set implementation */ +/*****************************************************************************/ + + + +typedef unsigned char CharSet[32]; /* 256 bits */ + + + +/* Clear a character set */ +#define CS_CLEAR(CS) memset (CS, 0, sizeof (CharSet)) + +/* Set all characters in the set */ +#define CS_SETALL(CS) memset (CS, 0xFF, sizeof (CharSet)) + +/* Add one char to the set */ +#define CS_ADD(CS, C) ((CS)[(C) >> 3] |= (0x01 << ((C) & 0x07))) + +/* Check if a character is a member of the set */ +#define CS_CONTAINS(CS, C) ((CS)[(C) >> 3] & (0x01 << ((C) & 0x07))) + +/* Invert a character set */ +#define CS_INVERT(CS) \ + do { \ + unsigned I; \ + for (I = 0; I < sizeof (CharSet); ++I) { \ + CS[I] ^= 0xFF; \ + } \ + } while (0) + + + + + +/*****************************************************************************/ +/* Code */ +/*****************************************************************************/ + + + +/* Escape character */ +#define ESCAPE_CHAR '\\' + +/* Utility macro used in RecursiveMatch */ +#define IncPattern() Pattern++; \ + if (*Pattern == '\0') { \ + return 0; \ + } + + + +static int RealChar (const unsigned char* Pattern) +/* Return the next character from Pattern. If the next character is the + * escape character, skip it and return the following. + */ +{ + if (*Pattern == ESCAPE_CHAR) { + Pattern++; + return (*Pattern == '\0') ? -1 : *Pattern; + } else { + return *Pattern; + } +} + + + +static int RecursiveMatch (const unsigned char* Source, const unsigned char* Pattern) +/* A recursive pattern matcher */ +{ + + CharSet CS; + + while (1) { + + if (*Pattern == '\0') { + + /* Reached the end of Pattern, what about Source? */ + return (*Source == '\0') ? 1 : 0; + + } else if (*Pattern == '*') { + + if (*++Pattern == '\0') { + /* A trailing '*' is always a match */ + return 1; + } + + /* Check the rest of the string */ + while (*Source) { + if (RecursiveMatch (Source++, Pattern)) { + /* Match! */ + return 1; + } + } + + /* No match... */ + return 0; + + } else if (*Source == '\0') { + + /* End of Source reached, no match */ + return 0; + + } else { + + /* Check a single char. Build a set of all possible characters in + * CS, then check if the current char of Source is contained in + * there. + */ + CS_CLEAR (CS); /* Clear the character set */ + + if (*Pattern == '?') { + + /* All chars are allowed */ + CS_SETALL (CS); + ++Pattern; /* Skip '?' */ + + } else if (*Pattern == ESCAPE_CHAR) { + + /* Use the next char as is */ + IncPattern (); + CS_ADD (CS, *Pattern); + ++Pattern; /* Skip the character */ + + } else if (*Pattern == '[') { + + /* A set follows */ + int Invert = 0; + IncPattern (); + if (*Pattern == '!') { + IncPattern (); + Invert = 1; + } + while (*Pattern != ']') { + + int C1; + if ((C1 = RealChar (Pattern)) == -1) { + return 0; + } + IncPattern (); + if (*Pattern != '-') { + CS_ADD (CS, C1); + } else { + int C2; + unsigned char C; + IncPattern (); + if ((C2 = RealChar (Pattern)) == -1) { + return 0; + } + IncPattern (); + for (C = C1; C <= C2; C++) { + CS_ADD (CS, C); + } + } + } + /* Skip ']' */ + ++Pattern; + if (Invert) { + /* Reverse all bits in the set */ + CS_INVERT (CS); + } + + } else { + + /* Include the char in the charset, then skip it */ + CS_ADD (CS, *Pattern); + ++Pattern; + + } + + if (!CS_CONTAINS (CS, *Source)) { + /* No match */ + return 0; + } + ++Source; + } + } +} + + + + +int MatchPattern (const char* Source, const char* Pattern) +/* Match the string in Source against Pattern. Pattern may contain the + * wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The + * function returns a value of zero if Source does not match Pattern, + * otherwise a non zero value is returned. If Pattern contains an invalid + * wildcard pattern (e.g. 'A[x'), the function returns zero. + */ +{ + /* Handle the trivial cases */ + if (Pattern == 0 || *Pattern == '\0') { + return (Source == 0 || *Source == '\0'); + } + + /* Do the real thing */ + return RecursiveMatch ((const unsigned char*) Source, (const unsigned char*) Pattern); +} + + + diff --git a/src/common/matchpat.h b/src/common/matchpat.h new file mode 100644 index 000000000..b71205599 --- /dev/null +++ b/src/common/matchpat.h @@ -0,0 +1,66 @@ +/*****************************************************************************/ +/* */ +/* matchpat.h */ +/* */ +/* Unix shell like pattern matching */ +/* */ +/* */ +/* */ +/* (C) 2002 Ullrich von Bassewitz */ +/* Wacholderweg 14 */ +/* D-70597 Stuttgart */ +/* EMail: uz@musoftware.de */ +/* */ +/* */ +/* This software is provided 'as-is', without any expressed or implied */ +/* warranty. In no event will the authors be held liable for any damages */ +/* arising from the use of this software. */ +/* */ +/* Permission is granted to anyone to use this software for any purpose, */ +/* including commercial applications, and to alter it and redistribute it */ +/* freely, subject to the following restrictions: */ +/* */ +/* 1. The origin of this software must not be misrepresented; you must not */ +/* claim that you wrote the original software. If you use this software */ +/* in a product, an acknowledgment in the product documentation would be */ +/* appreciated but is not required. */ +/* 2. Altered source versions must be plainly marked as such, and must not */ +/* be misrepresented as being the original software. */ +/* 3. This notice may not be removed or altered from any source */ +/* distribution. */ +/* */ +/*****************************************************************************/ + + + +#ifndef MATCHPAT_H +#define MATCHPAT_H + + + +#include "attrib.h" + + + +/*****************************************************************************/ +/* Code */ +/*****************************************************************************/ + + + +int MatchPattern (const char* Source, const char* Pattern); +/* Match the string in Source against Pattern. Pattern may contain the + * wildcards '*', '?', '[abcd]' '[ab-d]', '[!abcd]', '[!ab-d]'. The + * function returns a value of zero if Source does not match Pattern, + * otherwise a non zero value is returned. If Pattern contains an invalid + * wildcard pattern (e.g. 'A[x'), the function returns zero. + */ + + + +/* End of matchpat.h */ + +#endif + + +