mirror of
https://github.com/ksherlock/mpw-shell.git
synced 2024-06-14 04:29:32 +00:00
270 lines
5.2 KiB
C++
270 lines
5.2 KiB
C++
|
|
||
|
#include "mpw-regex.h"
|
||
|
#include "environment.h"
|
||
|
|
||
|
typedef std::string::const_iterator iterator;
|
||
|
|
||
|
namespace {
|
||
|
bool ecma_special(unsigned char c) {
|
||
|
//
|
||
|
switch(c) {
|
||
|
case '|':
|
||
|
case '{':
|
||
|
case '}':
|
||
|
case '(':
|
||
|
case ')':
|
||
|
case '[':
|
||
|
case ']':
|
||
|
case '*':
|
||
|
case '+':
|
||
|
case '^':
|
||
|
case '$':
|
||
|
case '.':
|
||
|
case '\\':
|
||
|
return true;
|
||
|
default:
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
mpw_regex::mpw_regex(const std::string &s, bool slash) {
|
||
|
convert_re(s, slash);
|
||
|
}
|
||
|
|
||
|
bool mpw_regex::is_glob(const std::string &s) {
|
||
|
bool esc = false;
|
||
|
for (unsigned char c : s) {
|
||
|
if (esc) {
|
||
|
esc = false;
|
||
|
continue;
|
||
|
}
|
||
|
switch(c) {
|
||
|
case 0xb6:
|
||
|
esc = true;
|
||
|
break;
|
||
|
case '[':
|
||
|
case '?':
|
||
|
case '*':
|
||
|
case '+':
|
||
|
case 0xc7:
|
||
|
case 0xc5:
|
||
|
return true;
|
||
|
default:
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
bool mpw_regex::match(const std::string &s, Environment &e) {
|
||
|
std::smatch m;
|
||
|
bool ok = std::regex_match(s, m, re);
|
||
|
if (!ok) return false;
|
||
|
|
||
|
for (int i = 0; i < 10; ++i) {
|
||
|
int index = capture_map[i];
|
||
|
|
||
|
if (index && index < m.size() && m[index].matched) {
|
||
|
std::string v(m[index].first, m[index].second);
|
||
|
std::string k("\xa8");
|
||
|
k += (i + '0');
|
||
|
e.set(k, std::move(v));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
bool mpw_regex::match(const std::string &s) {
|
||
|
return std::regex_match(s, re);
|
||
|
}
|
||
|
|
||
|
|
||
|
// convert a mpw-flavor regex to std::regex flavor regex.
|
||
|
void mpw_regex::convert_re(const std::string &s, bool slash) {
|
||
|
|
||
|
|
||
|
std::string accumulator;
|
||
|
auto iter = s.begin();
|
||
|
auto end = s.end();
|
||
|
|
||
|
if (slash) {
|
||
|
if (iter == end || *iter++ != '/')
|
||
|
throw std::regex_error(std::regex_constants::error_space);
|
||
|
}
|
||
|
|
||
|
iter = convert_re(iter, end, accumulator, slash ? '/' : 0);
|
||
|
|
||
|
if (iter != end) throw std::regex_error(std::regex_constants::error_space);
|
||
|
|
||
|
|
||
|
re = std::regex(accumulator);
|
||
|
if (slash) key = s;
|
||
|
else key = "/" + s + "/";
|
||
|
}
|
||
|
|
||
|
|
||
|
iterator mpw_regex::convert_re(iterator iter, iterator end, std::string &accumulator, unsigned char term) {
|
||
|
|
||
|
while (iter != end) {
|
||
|
unsigned char c = *iter++;
|
||
|
if (c == 0xb6) {
|
||
|
// escape
|
||
|
if (iter == end) throw std::regex_error(std::regex_constants::error_escape);
|
||
|
c = *iter++;
|
||
|
if (ecma_special(c))
|
||
|
accumulator += '\\';
|
||
|
accumulator += c;
|
||
|
continue;
|
||
|
}
|
||
|
if (term && c == term) {
|
||
|
return iter;
|
||
|
}
|
||
|
if (c == '?') {
|
||
|
// match any char
|
||
|
accumulator += '.';
|
||
|
continue;
|
||
|
}
|
||
|
if (c == 0xc5) {
|
||
|
// match any string
|
||
|
accumulator += ".*";
|
||
|
continue;
|
||
|
}
|
||
|
if (c == '[') {
|
||
|
// begin a set
|
||
|
iter = convert_re_set(iter, end, accumulator);
|
||
|
continue;
|
||
|
}
|
||
|
if (c == '(') {
|
||
|
// begin a capture
|
||
|
iter = convert_re_capture(iter, end, accumulator);
|
||
|
continue;
|
||
|
}
|
||
|
if (c == 0xc7) {
|
||
|
// repeat
|
||
|
iter = convert_re_repeat(iter, end, accumulator);
|
||
|
continue;
|
||
|
}
|
||
|
if (c == '+' || c == '*') {
|
||
|
// same meaning
|
||
|
accumulator += c;
|
||
|
continue;
|
||
|
}
|
||
|
if (ecma_special(c)) {
|
||
|
accumulator += '\\';
|
||
|
}
|
||
|
accumulator += c;
|
||
|
}
|
||
|
|
||
|
if (term) throw std::regex_error(std::regex_constants::error_paren);
|
||
|
return iter;
|
||
|
}
|
||
|
|
||
|
iterator mpw_regex::convert_re_repeat(iterator iter, iterator end, std::string &accumulator) {
|
||
|
int min = -1;
|
||
|
int max = -1;
|
||
|
|
||
|
accumulator += "{";
|
||
|
|
||
|
while (iter != end) {
|
||
|
unsigned char c = *iter++;
|
||
|
if (c == 0xc8) {
|
||
|
accumulator += "}";
|
||
|
return iter;
|
||
|
}
|
||
|
if (c != ',' && !isdigit(c)) break;
|
||
|
accumulator += c;
|
||
|
}
|
||
|
throw std::regex_error(std::regex_constants::error_brace);
|
||
|
}
|
||
|
|
||
|
iterator mpw_regex::convert_re_set(iterator iter, iterator end, std::string &accumulator) {
|
||
|
// need extra logic to block character classes.
|
||
|
|
||
|
unsigned char c;
|
||
|
accumulator += "[";
|
||
|
|
||
|
if (iter != end && static_cast<unsigned char>(*iter) == 0xc2) {
|
||
|
accumulator += "^";
|
||
|
++iter;
|
||
|
} else if (iter != end && *iter == '^') {
|
||
|
// leading ^ needs to be escaped.
|
||
|
accumulator += "\\^";
|
||
|
++iter;
|
||
|
}
|
||
|
while (iter != end) {
|
||
|
c = *iter++;
|
||
|
|
||
|
if (c == 0xb6) {
|
||
|
// escape
|
||
|
if (iter == end) throw std::regex_error(std::regex_constants::error_escape);
|
||
|
c = *iter++;
|
||
|
accumulator += '\\';
|
||
|
accumulator += c;
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
if (c == ']') {
|
||
|
accumulator += "]";
|
||
|
return iter;
|
||
|
}
|
||
|
if (c == '\\') {
|
||
|
accumulator += "\\\\";
|
||
|
continue;
|
||
|
}
|
||
|
accumulator += c;
|
||
|
}
|
||
|
|
||
|
throw std::regex_error(std::regex_constants::error_brack);
|
||
|
}
|
||
|
|
||
|
iterator mpw_regex::convert_re_capture(iterator iter, iterator end, std::string &accumulator) {
|
||
|
|
||
|
|
||
|
/*
|
||
|
* consider: (abc(abc)®1(xyz))®2
|
||
|
* m[1] = (abcabcxyz)
|
||
|
* m[2] = (abc)
|
||
|
* BUT we don't know if it's captured until the ® is parsed.
|
||
|
*/
|
||
|
|
||
|
std::string scratch;
|
||
|
bool capture = false;
|
||
|
int n = -1;
|
||
|
|
||
|
int ecma_index = ++num_captures;
|
||
|
|
||
|
if (iter != end && *iter == '?') {
|
||
|
// leading ? needs to be escaped.
|
||
|
scratch += "\\?";
|
||
|
++iter;
|
||
|
}
|
||
|
iter = convert_re(iter, end, scratch, ')');
|
||
|
|
||
|
// check for capture?
|
||
|
if (iter != end && static_cast<unsigned char>(*iter) == 0xa8) {
|
||
|
++iter;
|
||
|
if (iter == end || !isdigit(*iter))
|
||
|
throw std::regex_error(std::regex_constants::error_badbrace); // eh
|
||
|
n = *iter++ - '0';
|
||
|
capture = true;
|
||
|
}
|
||
|
|
||
|
accumulator += '(';
|
||
|
if (capture) {
|
||
|
/// ummm capture within a capture? backwards?
|
||
|
capture_map[n] = ecma_index;
|
||
|
} else {
|
||
|
accumulator += "?:";
|
||
|
// re-number all sub-captures.
|
||
|
--num_captures;
|
||
|
for (int &index : capture_map) {
|
||
|
if (index >= ecma_index) --index;
|
||
|
}
|
||
|
}
|
||
|
accumulator += scratch;
|
||
|
accumulator += ')';
|
||
|
return iter;
|
||
|
}
|