2009-08-30 08:24:09 +00:00
|
|
|
//===-- Regex.h - Regular Expression matcher implementation -*- C++ -*-----===//
|
|
|
|
//
|
|
|
|
// The LLVM Compiler Infrastructure
|
|
|
|
//
|
|
|
|
// This file is distributed under the University of Illinois Open Source
|
|
|
|
// License. See LICENSE.TXT for details.
|
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
//
|
Add backreference matching capabilities to Support/Regex, with
appropriate unit tests. This change in itself is not expected to
affect any functionality at this point, but it will serve as a
stepping stone to improve FileCheck's variable matching capabilities.
Luckily, our regex implementation already supports backreferences,
although a bit of hacking is required to enable it. It supports both
Basic Regular Expressions (BREs) and Extended Regular Expressions
(EREs), without supporting backrefs for EREs, following POSIX strictly
in this respect. And EREs is what we actually use (rightly). This is
contrary to many implementations (including the default on Linux) of
POSIX regexes, that do allow backrefs in EREs.
Adding backref support to our EREs is a very simple change in the
regcomp parsing code. I fail to think of significant cases where it
would clash with existing things, and can bring more versatility to
the regexes we write. There's always the danger of a backref in a
specially crafted regex causing exponential matching times, but since
we mainly use them for testing purposes I don't think it's a big
problem. [it can also be placed behind a flag specific to FileCheck,
if needed].
For more details, see:
* http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055840.html
* http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20121126/156878.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168802 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-28 19:00:02 +00:00
|
|
|
// This file implements a POSIX regular expression matcher. Both Basic and
|
|
|
|
// Extended POSIX regular expressions (ERE) are supported. EREs were extended
|
|
|
|
// to support backreferences in matches.
|
|
|
|
// This implementation also supports matching strings with embedded NUL chars.
|
2009-08-30 08:24:09 +00:00
|
|
|
//
|
|
|
|
//===----------------------------------------------------------------------===//
|
|
|
|
|
2010-08-20 17:38:44 +00:00
|
|
|
#ifndef LLVM_SUPPORT_REGEX_H
|
|
|
|
#define LLVM_SUPPORT_REGEX_H
|
|
|
|
|
2014-01-02 19:04:59 +00:00
|
|
|
#include "llvm/Support/Compiler.h"
|
2009-09-24 21:47:32 +00:00
|
|
|
#include <string>
|
2009-08-30 08:24:09 +00:00
|
|
|
|
|
|
|
struct llvm_regex;
|
2009-09-24 21:47:32 +00:00
|
|
|
|
2009-08-30 08:24:09 +00:00
|
|
|
namespace llvm {
|
2009-09-24 21:47:32 +00:00
|
|
|
class StringRef;
|
|
|
|
template<typename T> class SmallVectorImpl;
|
2010-08-20 17:38:38 +00:00
|
|
|
|
2009-08-30 08:24:09 +00:00
|
|
|
class Regex {
|
|
|
|
public:
|
|
|
|
enum {
|
2009-09-26 21:27:04 +00:00
|
|
|
NoFlags=0,
|
2009-08-30 08:24:09 +00:00
|
|
|
/// Compile for matching that ignores upper/lower case distinctions.
|
|
|
|
IgnoreCase=1,
|
|
|
|
/// Compile for newline-sensitive matching. With this flag '[^' bracket
|
2010-08-20 17:38:38 +00:00
|
|
|
/// expressions and '.' never match newline. A ^ anchor matches the
|
|
|
|
/// null string after any newline in the string in addition to its normal
|
|
|
|
/// function, and the $ anchor matches the null string before any
|
2009-08-30 08:24:09 +00:00
|
|
|
/// newline in the string in addition to its normal function.
|
Add backreference matching capabilities to Support/Regex, with
appropriate unit tests. This change in itself is not expected to
affect any functionality at this point, but it will serve as a
stepping stone to improve FileCheck's variable matching capabilities.
Luckily, our regex implementation already supports backreferences,
although a bit of hacking is required to enable it. It supports both
Basic Regular Expressions (BREs) and Extended Regular Expressions
(EREs), without supporting backrefs for EREs, following POSIX strictly
in this respect. And EREs is what we actually use (rightly). This is
contrary to many implementations (including the default on Linux) of
POSIX regexes, that do allow backrefs in EREs.
Adding backref support to our EREs is a very simple change in the
regcomp parsing code. I fail to think of significant cases where it
would clash with existing things, and can bring more versatility to
the regexes we write. There's always the danger of a backref in a
specially crafted regex causing exponential matching times, but since
we mainly use them for testing purposes I don't think it's a big
problem. [it can also be placed behind a flag specific to FileCheck,
if needed].
For more details, see:
* http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055840.html
* http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20121126/156878.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168802 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-28 19:00:02 +00:00
|
|
|
Newline=2,
|
|
|
|
/// By default, the POSIX extended regular expression (ERE) syntax is
|
|
|
|
/// assumed. Pass this flag to turn on basic regular expressions (BRE)
|
|
|
|
/// instead.
|
|
|
|
BasicRegex=4
|
2009-08-30 08:24:09 +00:00
|
|
|
};
|
|
|
|
|
Add backreference matching capabilities to Support/Regex, with
appropriate unit tests. This change in itself is not expected to
affect any functionality at this point, but it will serve as a
stepping stone to improve FileCheck's variable matching capabilities.
Luckily, our regex implementation already supports backreferences,
although a bit of hacking is required to enable it. It supports both
Basic Regular Expressions (BREs) and Extended Regular Expressions
(EREs), without supporting backrefs for EREs, following POSIX strictly
in this respect. And EREs is what we actually use (rightly). This is
contrary to many implementations (including the default on Linux) of
POSIX regexes, that do allow backrefs in EREs.
Adding backref support to our EREs is a very simple change in the
regcomp parsing code. I fail to think of significant cases where it
would clash with existing things, and can bring more versatility to
the regexes we write. There's always the danger of a backref in a
specially crafted regex causing exponential matching times, but since
we mainly use them for testing purposes I don't think it's a big
problem. [it can also be placed behind a flag specific to FileCheck,
if needed].
For more details, see:
* http://lists.cs.uiuc.edu/pipermail/llvmdev/2012-November/055840.html
* http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20121126/156878.html
git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@168802 91177308-0d34-0410-b5e6-96231b3b80d8
2012-11-28 19:00:02 +00:00
|
|
|
/// Compiles the given regular expression \p Regex.
|
2010-07-14 22:38:02 +00:00
|
|
|
Regex(StringRef Regex, unsigned Flags = NoFlags);
|
2014-01-02 19:04:59 +00:00
|
|
|
Regex(const Regex &) LLVM_DELETED_FUNCTION;
|
|
|
|
Regex &operator=(Regex regex) {
|
|
|
|
std::swap(preg, regex.preg);
|
|
|
|
std::swap(error, regex.error);
|
|
|
|
return *this;
|
|
|
|
}
|
|
|
|
Regex(Regex &®ex) {
|
|
|
|
preg = regex.preg;
|
|
|
|
error = regex.error;
|
2014-04-07 04:17:22 +00:00
|
|
|
regex.preg = nullptr;
|
2014-01-02 19:04:59 +00:00
|
|
|
}
|
2009-08-30 08:24:09 +00:00
|
|
|
~Regex();
|
|
|
|
|
|
|
|
/// isValid - returns the error encountered during regex compilation, or
|
|
|
|
/// matching, if any.
|
|
|
|
bool isValid(std::string &Error);
|
|
|
|
|
2009-09-26 21:27:04 +00:00
|
|
|
/// getNumMatches - In a valid regex, return the number of parenthesized
|
|
|
|
/// matches it contains. The number filled in by match will include this
|
|
|
|
/// many entries plus one for the whole regex (as element 0).
|
|
|
|
unsigned getNumMatches() const;
|
2010-08-20 17:38:38 +00:00
|
|
|
|
2012-09-14 14:57:36 +00:00
|
|
|
/// matches - Match the regex against a given \p String.
|
2009-08-30 08:24:09 +00:00
|
|
|
///
|
2011-04-15 05:18:47 +00:00
|
|
|
/// \param Matches - If given, on a successful match this will be filled in
|
2012-09-14 14:57:36 +00:00
|
|
|
/// with references to the matched group expressions (inside \p String),
|
2009-08-30 08:24:09 +00:00
|
|
|
/// the first group is always the entire pattern.
|
2009-09-24 21:47:32 +00:00
|
|
|
///
|
|
|
|
/// This returns true on a successful match.
|
2014-04-07 04:17:22 +00:00
|
|
|
bool match(StringRef String, SmallVectorImpl<StringRef> *Matches = nullptr);
|
2010-02-17 20:08:42 +00:00
|
|
|
|
|
|
|
/// sub - Return the result of replacing the first match of the regex in
|
2012-09-14 14:57:36 +00:00
|
|
|
/// \p String with the \p Repl string. Backreferences like "\0" in the
|
2010-02-17 20:08:42 +00:00
|
|
|
/// replacement string are replaced with the appropriate match substring.
|
|
|
|
///
|
|
|
|
/// Note that the replacement string has backslash escaping performed on
|
|
|
|
/// it. Invalid backreferences are ignored (replaced by empty strings).
|
|
|
|
///
|
|
|
|
/// \param Error If non-null, any errors in the substitution (invalid
|
|
|
|
/// backreferences, trailing backslashes) will be recorded as a non-empty
|
|
|
|
/// string.
|
2014-04-07 04:17:22 +00:00
|
|
|
std::string sub(StringRef Repl, StringRef String,
|
|
|
|
std::string *Error = nullptr);
|
2010-02-17 20:08:42 +00:00
|
|
|
|
2013-08-05 17:47:59 +00:00
|
|
|
/// \brief If this function returns true, ^Str$ is an extended regular
|
|
|
|
/// expression that matches Str and only Str.
|
|
|
|
static bool isLiteralERE(StringRef Str);
|
|
|
|
|
2013-12-12 00:06:41 +00:00
|
|
|
/// \brief Turn String into a regex by escaping its special characters.
|
|
|
|
static std::string escape(StringRef String);
|
|
|
|
|
2009-08-30 08:24:09 +00:00
|
|
|
private:
|
|
|
|
struct llvm_regex *preg;
|
|
|
|
int error;
|
|
|
|
};
|
|
|
|
}
|
2010-08-20 17:38:44 +00:00
|
|
|
|
|
|
|
#endif // LLVM_SUPPORT_REGEX_H
|