remove support for "NoSub" from regex. It seems like a minor optimization

and makes the API more annoying.  Add a Regex::getNumMatches() method.


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@82877 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-09-26 21:27:04 +00:00
parent 282098be84
commit 81f46d9ce1
4 changed files with 32 additions and 40 deletions

View File

@ -22,47 +22,42 @@ namespace llvm {
class Regex { class Regex {
public: public:
enum { enum {
/// Compile with support for subgroup matches, this is just to make NoFlags=0,
/// constructs like Regex("...", 0) more readable as Regex("...", Sub).
Sub=0,
/// Compile for matching that ignores upper/lower case distinctions. /// Compile for matching that ignores upper/lower case distinctions.
IgnoreCase=1, IgnoreCase=1,
/// Compile for matching that need only report success or failure,
/// not what was matched.
NoSub=2,
/// Compile for newline-sensitive matching. With this flag '[^' bracket /// Compile for newline-sensitive matching. With this flag '[^' bracket
/// expressions and '.' never match newline. A ^ anchor matches the /// expressions and '.' never match newline. A ^ anchor matches the
/// null string after any newline in the string in addition to its normal /// null string after any newline in the string in addition to its normal
/// function, and the $ anchor matches the null string before any /// function, and the $ anchor matches the null string before any
/// newline in the string in addition to its normal function. /// newline in the string in addition to its normal function.
Newline=4 Newline=2
}; };
/// Compiles the given POSIX Extended Regular Expression \arg Regex. /// Compiles the given POSIX Extended Regular Expression \arg Regex.
/// This implementation supports regexes and matching strings with embedded /// This implementation supports regexes and matching strings with embedded
/// NUL characters. /// NUL characters.
Regex(const StringRef &Regex, unsigned Flags=NoSub); Regex(const StringRef &Regex, unsigned Flags = NoFlags);
~Regex(); ~Regex();
/// isValid - returns the error encountered during regex compilation, or /// isValid - returns the error encountered during regex compilation, or
/// matching, if any. /// matching, if any.
bool isValid(std::string &Error); bool isValid(std::string &Error);
/// getNumMatches - In a valid regex, return the number of parenthesized
/// matches it contains. The number filled in by match will include this
/// many entries plus one for the whole regex (as element 0).
unsigned getNumMatches() const;
/// matches - Match the regex against a given \arg String. /// matches - Match the regex against a given \arg String.
/// ///
/// \param Matches - If given, on a succesful match this will be filled in /// \param Matches - If given, on a succesful match this will be filled in
/// with references to the matched group expressions (inside \arg String), /// with references to the matched group expressions (inside \arg String),
/// the first group is always the entire pattern. /// the first group is always the entire pattern.
/// By default the regex is compiled with NoSub, which disables support for
/// Matches.
/// For this feature to be enabled you must construct the regex using
/// Regex("...", Regex::Sub) constructor.
/// ///
/// This returns true on a successful match. /// This returns true on a successful match.
bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0); bool match(const StringRef &String, SmallVectorImpl<StringRef> *Matches=0);
private: private:
struct llvm_regex *preg; struct llvm_regex *preg;
int error; int error;
bool sub;
}; };
} }

View File

@ -25,17 +25,16 @@ Regex::Regex(const StringRef &regex, unsigned Flags) {
preg->re_endp = regex.end(); preg->re_endp = regex.end();
if (Flags & IgnoreCase) if (Flags & IgnoreCase)
flags |= REG_ICASE; flags |= REG_ICASE;
if (Flags & NoSub) {
flags |= REG_NOSUB;
sub = false;
} else {
sub = true;
}
if (Flags & Newline) if (Flags & Newline)
flags |= REG_NEWLINE; flags |= REG_NEWLINE;
error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND); error = llvm_regcomp(preg, regex.data(), flags|REG_EXTENDED|REG_PEND);
} }
Regex::~Regex() {
llvm_regfree(preg);
delete preg;
}
bool Regex::isValid(std::string &Error) { bool Regex::isValid(std::string &Error) {
if (!error) if (!error)
return true; return true;
@ -47,19 +46,15 @@ bool Regex::isValid(std::string &Error) {
return false; return false;
} }
Regex::~Regex() { /// getNumMatches - In a valid regex, return the number of parenthesized
llvm_regfree(preg); /// matches it contains.
delete preg; unsigned Regex::getNumMatches() const {
return preg->re_nsub;
} }
bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
unsigned nmatch = Matches ? preg->re_nsub+1 : 0; unsigned nmatch = Matches ? preg->re_nsub+1 : 0;
if (Matches) {
assert(sub && "Substring matching requested but pattern compiled without");
Matches->clear();
}
// pmatch needs to have at least one element. // pmatch needs to have at least one element.
SmallVector<llvm_regmatch_t, 8> pm; SmallVector<llvm_regmatch_t, 8> pm;
pm.resize(nmatch > 0 ? nmatch : 1); pm.resize(nmatch > 0 ? nmatch : 1);
@ -79,6 +74,8 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
// There was a match. // There was a match.
if (Matches) { // match position requested if (Matches) { // match position requested
Matches->clear();
for (unsigned i = 0; i != nmatch; ++i) { for (unsigned i = 0; i != nmatch; ++i) {
if (pm[i].rm_so == -1) { if (pm[i].rm_so == -1) {
// this group didn't match // this group didn't match

View File

@ -25,12 +25,12 @@ TEST_F(RegexTest, Basics) {
EXPECT_FALSE(r1.match("9a")); EXPECT_FALSE(r1.match("9a"));
SmallVector<StringRef, 1> Matches; SmallVector<StringRef, 1> Matches;
Regex r2("[0-9]+", Regex::Sub); Regex r2("[0-9]+");
EXPECT_TRUE(r2.match("aa216b", &Matches)); EXPECT_TRUE(r2.match("aa216b", &Matches));
EXPECT_EQ(1u, Matches.size()); EXPECT_EQ(1u, Matches.size());
EXPECT_EQ("216", Matches[0].str()); EXPECT_EQ("216", Matches[0].str());
Regex r3("[0-9]+([a-f])?:([0-9]+)", Regex::Sub); Regex r3("[0-9]+([a-f])?:([0-9]+)");
EXPECT_TRUE(r3.match("9a:513b", &Matches)); EXPECT_TRUE(r3.match("9a:513b", &Matches));
EXPECT_EQ(3u, Matches.size()); EXPECT_EQ(3u, Matches.size());
EXPECT_EQ("9a:513", Matches[0].str()); EXPECT_EQ("9a:513", Matches[0].str());
@ -43,7 +43,7 @@ TEST_F(RegexTest, Basics) {
EXPECT_EQ("", Matches[1].str()); EXPECT_EQ("", Matches[1].str());
EXPECT_EQ("513", Matches[2].str()); EXPECT_EQ("513", Matches[2].str());
Regex r4("a[^b]+b", Regex::Sub); Regex r4("a[^b]+b");
std::string String="axxb"; std::string String="axxb";
String[2] = '\0'; String[2] = '\0';
EXPECT_FALSE(r4.match("abb")); EXPECT_FALSE(r4.match("abb"));
@ -55,7 +55,7 @@ TEST_F(RegexTest, Basics) {
std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)"; std::string NulPattern="X[0-9]+X([a-f])?:([0-9]+)";
String="YX99a:513b"; String="YX99a:513b";
NulPattern[7] = '\0'; NulPattern[7] = '\0';
Regex r5(NulPattern, Regex::Sub); Regex r5(NulPattern);
EXPECT_FALSE(r5.match(String)); EXPECT_FALSE(r5.match(String));
EXPECT_FALSE(r5.match("X9")); EXPECT_FALSE(r5.match("X9"));
String[3]='\0'; String[3]='\0';

View File

@ -168,7 +168,7 @@ size_t Pattern::Match(StringRef Buffer, size_t &MatchLen) const {
// Regex match. // Regex match.
SmallVector<StringRef, 4> MatchInfo; SmallVector<StringRef, 4> MatchInfo;
if (!Regex(RegExStr, Regex::Sub|Regex::Newline).match(Buffer, &MatchInfo)) if (!Regex(RegExStr, Regex::Newline).match(Buffer, &MatchInfo))
return StringRef::npos; return StringRef::npos;
// Successful regex match. // Successful regex match.