mirror of
https://github.com/ksherlock/mpw-shell.git
synced 2025-01-08 19:31:31 +00:00
272 lines
5.2 KiB
Ragel
272 lines
5.2 KiB
Ragel
#include <string>
|
|
#include <vector>
|
|
#include <stdio.h>
|
|
|
|
#include "mpw-shell.h"
|
|
#include "error.h"
|
|
|
|
%%{
|
|
machine tokenizer;
|
|
alphtype unsigned char;
|
|
|
|
|
|
escape = 0xb6;
|
|
ws = [ \t\n\r];
|
|
|
|
action push_token {
|
|
if (!scratch.empty()) {
|
|
tokens.emplace_back(std::move(scratch));
|
|
scratch.clear();
|
|
}
|
|
}
|
|
|
|
action push {
|
|
scratch.push_back(fc);
|
|
}
|
|
|
|
action push_string {
|
|
scratch.append(ts, te);
|
|
}
|
|
|
|
schar = [^'] ;
|
|
sstring = ['] schar** ['] $err{ throw sstring_error(); } ;
|
|
|
|
escape_seq = escape any ;
|
|
|
|
# double-quoted string.
|
|
dchar = escape_seq | (any - escape - ["]);
|
|
dstring = ["] dchar** ["] $err{ throw dstring_error(); } ;
|
|
|
|
|
|
action eval { eval }
|
|
|
|
# > == start state (single char tokens or common prefix)
|
|
# % == final state (multi char tokens w/ unique prefix)
|
|
# $ == all states
|
|
char = any - ['"];
|
|
main := |*
|
|
ws+ >push_token;
|
|
'>>' %push_token => { tokens.emplace_back(">>", '>>'); };
|
|
'>' %push_token => { tokens.emplace_back(">", '>'); };
|
|
|
|
'<' %push_token => { tokens.emplace_back("<", '<'); };
|
|
|
|
# macroman ∑, ∑∑
|
|
0xb7 0xb7 %push_token => { tokens.emplace_back("\xb7\xb7", 0xb7b7); };
|
|
0xb7 %push_token => { tokens.emplace_back("\xb7", 0xb7); };
|
|
|
|
# macroman ≥, ≥≥
|
|
0xb3 0xb3 %push_token => { tokens.emplace_back("\xb3\xb3", 0xb3b3); };
|
|
0xb3 %push_token => { tokens.emplace_back("\xb3", 0xb3); };
|
|
|
|
# eval-only.
|
|
|
|
'||' when eval
|
|
%push_token => { tokens.emplace_back("||", '||'); };
|
|
'|' when eval
|
|
%push_token => { tokens.emplace_back("|", '|'); };
|
|
|
|
'&&' when eval
|
|
%push_token => { tokens.emplace_back("&&", '&&'); };
|
|
|
|
|
|
'(' when eval
|
|
%push_token => { tokens.emplace_back("(", '('); };
|
|
|
|
')' when eval
|
|
%push_token => { tokens.emplace_back(")", ')'); };
|
|
|
|
|
|
'<<' when eval
|
|
%push_token => { tokens.emplace_back("<<", '<<'); };
|
|
|
|
'<=' when eval
|
|
%push_token => { tokens.emplace_back("<=", '<='); };
|
|
|
|
'>=' when eval
|
|
%push_token => { tokens.emplace_back(">=", '>='); };
|
|
|
|
'==' when eval
|
|
%push_token => { tokens.emplace_back("==", '=='); };
|
|
|
|
'!=' when eval
|
|
%push_token => { tokens.emplace_back("!=", '!='); };
|
|
|
|
'&' when eval
|
|
%push_token => { tokens.emplace_back("&", '&'); };
|
|
|
|
'+' when eval
|
|
>push_token => { tokens.emplace_back("+", '+'); };
|
|
|
|
'*' when eval
|
|
%push_token => { tokens.emplace_back("*", '*'); };
|
|
|
|
'%' when eval
|
|
%push_token => { tokens.emplace_back("%", '%'); };
|
|
|
|
|
|
'-' when eval
|
|
%push_token => { tokens.emplace_back("+", '-'); };
|
|
|
|
'!' when eval
|
|
%push_token => { tokens.emplace_back("!", '!'); };
|
|
|
|
'^' when eval
|
|
%push_token => { tokens.emplace_back("^", '^'); };
|
|
|
|
'~' when eval
|
|
%push_token => { tokens.emplace_back("~", '~'); };
|
|
|
|
|
|
'=' when eval
|
|
%push_token => { tokens.emplace_back("=", '='); };
|
|
|
|
'+=' when eval
|
|
%push_token => { tokens.emplace_back("+=", '+='); };
|
|
|
|
'-=' when eval
|
|
%push_token => { tokens.emplace_back("-=", '-='); };
|
|
|
|
|
|
sstring => push_string;
|
|
dstring => push_string;
|
|
escape_seq => push_string;
|
|
|
|
char => push;
|
|
*|
|
|
;
|
|
}%%
|
|
|
|
|
|
|
|
void replace_eval_token(token &t) {
|
|
|
|
%%{
|
|
|
|
machine eval_keywords;
|
|
|
|
main :=
|
|
'and'i %{ t.type = '&&'; }
|
|
| 'or'i %{ t.type = '||'; }
|
|
| 'not'i %{ t.type = '!'; }
|
|
| 'div'i %{ t.type = '/'; }
|
|
| 'mod'i %{ t.type = '%'; }
|
|
;
|
|
}%%
|
|
|
|
|
|
%%machine eval_keywords;
|
|
%%write data;
|
|
|
|
|
|
const char *p = t.string.data();
|
|
const char *pe = t.string.data() + t.string.size();
|
|
const char *eof = pe;
|
|
int cs;
|
|
|
|
%%write init;
|
|
|
|
%%write exec;
|
|
}
|
|
|
|
|
|
void unquote(token &t) {
|
|
|
|
if (t.string.find_first_of("'\"\xb6", 0, 3) == t.string.npos) return;
|
|
|
|
int cs;
|
|
const unsigned char *p = (const unsigned char *)t.string.data();
|
|
const unsigned char *pe = p + t.string.length();
|
|
const unsigned char *eof = pe;
|
|
|
|
std::string scratch;
|
|
scratch.reserve(t.string.length());
|
|
%%{
|
|
|
|
machine unquote;
|
|
alphtype unsigned char;
|
|
|
|
action push { scratch.push_back(fc); }
|
|
escape = 0xb6;
|
|
char = any - escape - ['"];
|
|
|
|
schar = [^'] $push;
|
|
sstring = ['] schar** ['];
|
|
|
|
ecode =
|
|
'f' ${ scratch.push_back('\f'); }
|
|
| 'n' ${ scratch.push_back('\n'); }
|
|
| 't' ${ scratch.push_back('\t'); }
|
|
| [^fnt] ${ scratch.push_back(fc); }
|
|
;
|
|
|
|
escape_seq = escape $err{ scratch.push_back(escape); } ecode;
|
|
|
|
dchar = escape ecode | (any - escape - ["]) $push;
|
|
dstring = ["] dchar** ["];
|
|
|
|
main := (
|
|
escape_seq
|
|
| sstring
|
|
| dstring
|
|
| char $push
|
|
)**;
|
|
|
|
write data;
|
|
write init;
|
|
write exec;
|
|
}%%
|
|
|
|
t.string = std::move(scratch);
|
|
}
|
|
|
|
|
|
std::vector<token> tokenize(std::string &s, bool eval)
|
|
{
|
|
std::vector<token> tokens;
|
|
std::string scratch;
|
|
|
|
|
|
%%machine tokenizer;
|
|
%% write data;
|
|
|
|
int cs, act;
|
|
unsigned const char *p = (const unsigned char *)s.data();
|
|
unsigned const char *pe = (const unsigned char *)s.data() + s.size();
|
|
unsigned const char *eof = pe;
|
|
|
|
unsigned const char *ts, *te;
|
|
|
|
%%write init;
|
|
|
|
%%write exec;
|
|
|
|
if (!scratch.empty()) {
|
|
tokens.emplace_back(std::move(scratch));
|
|
scratch.clear();
|
|
}
|
|
|
|
// re-build s.
|
|
s.clear();
|
|
for (const token &t : tokens) {
|
|
s.append(t.string);
|
|
s.push_back(' ');
|
|
}
|
|
if (!s.empty()) s.pop_back();
|
|
|
|
for (token &t : tokens) {
|
|
if (t.type == token::text) unquote(t);
|
|
}
|
|
|
|
// alternate operator tokens for eval
|
|
if (eval) {
|
|
|
|
for (token & t : tokens) {
|
|
if (t.type == token::text) replace_eval_token(t);
|
|
|
|
}
|
|
}
|
|
|
|
return tokens;
|
|
}
|