mpw-shell/phase2.rl

282 lines
4.5 KiB
Plaintext
Raw Normal View History

2016-01-30 03:23:14 +00:00
/*
* phase2 -- parse a line into major control structures (begin/end/if/etc)
* input is a full line -- comments have been removed, escape-nl handled, trailing newline stripped.
*
*/
#include "phase2.h"
#include "phase3.h"
2016-01-30 03:23:14 +00:00
%%{
machine main;
alphtype unsigned char;
action not_special { !special() }
action parse_ws {
if (scratch.empty()) fgoto main;
}
action parse_semi {
flush();
parse(SEMI, ";");
fgoto main;
}
2016-01-31 05:41:02 +00:00
action parse_amp_amp {
if (!special()) {
scratch.pop_back();
flush();
parse(AMP_AMP, "&&");
fgoto main;
2016-01-31 05:41:02 +00:00
}
}
action parse_pipe_pipe {
if (!special()) {
scratch.pop_back();
flush();
parse(PIPE_PIPE, "||");
fgoto main;
2016-01-31 05:41:02 +00:00
}
}
2016-01-31 05:41:02 +00:00
2016-07-26 18:20:11 +00:00
action parse_pipe_any {
if (!special()) {
scratch.pop_back();
flush();
parse(PIPE, "|");
}
fhold;
fgoto main;
}
action parse_pipe_eof {
if (!special()) {
scratch.pop_back();
flush();
parse(PIPE, "|");
}
}
action parse_lparen {
if (scratch.empty()) {
parse(LPAREN, "(");
fgoto main;
2016-01-31 05:41:02 +00:00
}
pcount++;
}
2016-01-31 05:41:02 +00:00
action parse_rparen {
if (pcount <= 0) {
flush();
parse(RPAREN, ")");
fgoto main;
}
--pcount;
}
2016-01-31 05:41:02 +00:00
escape = 0xb6;
ws = [ \t];
2016-01-31 05:41:02 +00:00
escape_seq = escape any ;
2016-01-30 03:23:14 +00:00
schar = [^'];
sstring = ['] schar** ['] ;
2016-01-30 03:23:14 +00:00
vchar = [^}];
vstring = [{] vchar** [}] ;
2016-01-31 05:41:02 +00:00
# double-quoted string.
dchar = escape_seq | (any - escape - ["]) ;
dstring = ["] dchar** ["];
echar = escape_seq | (any - escape - [`]) ;
estring1 = '`' echar** '`';
estring2 = '``' echar** '``';
estring = estring1 | estring2 ;
# default action is to push character into scratch.
# fgoto main inhibits.
main := (
ws $parse_ws
| ';' $parse_semi
| '(' $parse_lparen
| ')' $parse_rparen
2016-07-26 18:20:11 +00:00
| '|' <eof(parse_pipe_eof)
| '|' [^|] $parse_pipe_any
| '|' '|' $parse_pipe_pipe
| '&' '&' $parse_amp_amp
| escape_seq
| sstring
| dstring
| vstring
| estring
| any
)** ${ scratch.push_back(fc); };
2016-01-31 05:41:02 +00:00
}%%
2016-01-30 03:23:14 +00:00
%%{
machine argv0;
alphtype unsigned char;
2016-01-30 03:23:14 +00:00
action push { argv0.push_back(tolower(fc)); }
2016-07-28 20:30:17 +00:00
action break { fbreak; }
2016-01-31 05:41:02 +00:00
escape = 0xb6;
ws = [ \t];
2016-01-31 05:41:02 +00:00
# ` and { not supported here.
2016-01-31 05:41:02 +00:00
2016-01-30 03:23:14 +00:00
# hmmm ... only push the converted char - escape n = \n, for example.
esc_seq =
escape (
'f' ${argv0.push_back('\f'); } |
'n' ${argv0.push_back('\n'); } |
't' ${argv0.push_back('\t'); } |
[^fnt] $push
);
2016-01-30 03:23:14 +00:00
schar = [^'] $push;
sstring = ['] schar** ['];
2016-01-30 03:23:14 +00:00
dchar = esc_seq | (any-escape-["]) $push;
dstring = ["] dchar** ["];
2016-01-30 03:23:14 +00:00
2016-07-28 20:30:17 +00:00
# mpw doesn't handle quotes at this point,
# so simplify and stop if we see anything invalid.
main := (
2016-07-28 20:30:17 +00:00
ws $break
| [|<>] $break
| 0xb7 $break
| 0xb3 $break
| [^a-zA-Z] ${ return COMMAND; }
| any $push
)**;
2016-01-30 03:23:14 +00:00
}%%
2016-01-30 03:23:14 +00:00
int phase2::classify() {
2016-01-30 03:23:14 +00:00
%%machine argv0;
%%write data;
if (type) return type;
std::string argv0;
2016-01-30 03:23:14 +00:00
const unsigned char *p = (const unsigned char *)scratch.data();
const unsigned char *pe = p + scratch.size();
int cs;
2016-01-30 03:23:14 +00:00
type = COMMAND;
%%write init;
%%write exec;
2016-06-16 04:04:29 +00:00
// fprintf(stderr, "%s -> %s\n", scratch.c_str(), argv0.c_str());
#undef _
#define _(a,b) if (argv0 == a) { type = b; return type; }
// expand aliases?
_("begin", BEGIN)
_("break", BREAK)
_("continue", CONTINUE)
_("else", ELSE)
_("end", END)
_("evaluate", EVALUATE)
_("exit", EXIT)
_("for", FOR)
_("if", IF)
_("loop", LOOP)
#undef _
return type;
}
2016-01-30 03:23:14 +00:00
namespace {
%% machine argv0;
2016-01-30 03:23:14 +00:00
%% write data;
2016-01-30 03:23:14 +00:00
%% machine main;
%% write data;
}
void phase2::flush() {
//fprintf(stderr, "flush: %s\n", scratch.c_str());
2016-01-30 03:23:14 +00:00
// remove white space...
while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();
if (!scratch.empty()) parse(classify(), std::move(scratch));
2016-01-30 03:23:14 +00:00
type = 0;
pcount = 0;
2016-01-30 03:23:14 +00:00
scratch.clear();
}
2016-01-31 05:41:02 +00:00
/* slightly wrong since whitespace is needed for it to be special. */
2016-01-30 03:23:14 +00:00
bool phase2::special() {
switch (classify()) {
2016-01-30 03:23:14 +00:00
case IF:
case ELSE:
case ELSE_IF:
case EVALUATE:
2016-06-17 01:55:39 +00:00
case BREAK:
case CONTINUE:
case EXIT:
2016-01-30 03:23:14 +00:00
return true;
default:
return false;
}
}
void phase2::parse(int type, std::string &&s) {
if (_then) _then(type, std::move(s));
}
void phase2::parse(std::string &&line) {
2016-01-30 03:23:14 +00:00
//fprintf(stderr, "-> %s\n", line.c_str());
2016-01-30 03:23:14 +00:00
int cs;
const unsigned char *p = (const unsigned char *)line.data();
const unsigned char *pe = p + line.size();
const unsigned char *eof = pe;
scratch.clear();
type = 0;
pcount = 0; // parenthesis balancing within command only.
2016-01-30 03:23:14 +00:00
%% machine main;
%% write init;
%% write exec;
flush();
if (_then) {
_then(NL, "");
_then(NL, "");
2016-01-30 03:23:14 +00:00
}
2016-01-30 17:44:42 +00:00
}
void phase2::finish() {
2016-01-30 17:44:42 +00:00
}
void phase2::reset() {
2016-02-02 01:38:29 +00:00
type = 0;
pcount = 0;
scratch.clear();
2016-01-30 17:44:42 +00:00
}
2016-02-05 17:42:22 +00:00