diff --git a/phase1.h b/phase1.h new file mode 100644 index 0000000..5511a9e --- /dev/null +++ b/phase1.h @@ -0,0 +1,39 @@ +#ifndef __phase1_h__ +#define __phase1_h__ + + +#include +#include + +class phase1 { + +public: + typedef std::function pipe_function; + + phase1(); + + void process(const unsigned char *begin, const unsigned char *end, bool final = false); + + void process(const char *begin, const char *end, bool final = false) { + process((const unsigned char *)begin, (const unsigned char *)end, final); + } + + void process(const std::string &s) { process(s.data(), s.data() + s.size()); } + + void finish() { const char *tmp = ""; process(tmp, tmp, true); } + + void reset(); + + //template + //phase1 &operator >>= (F &&f) { pipe_to = pipe_function(f); return *this; } + + phase1 &operator >>= (pipe_function f) { pipe_to = f; return *this; } + +private: + std::string scratch; + pipe_function pipe_to; + int line = 1; + int cs = 0; +}; + +#endif diff --git a/phase1.rl b/phase1.rl new file mode 100644 index 0000000..c77958a --- /dev/null +++ b/phase1.rl @@ -0,0 +1,180 @@ +/* + * phase1 -- strip comments and merge multi-lines. + * + */ + + +#include "phase1.h" +#include +#include + +const unsigned char escape = 0xb6; + +/* + * from experimentation, mpw splits on ; after variable expansion; + * this splits before. something stupid like: + * set q '"'; echo {q} ; " + * will not be handled correctly. oh well. + * (should probably just drop that and we can then combine tokenizing w/ + * variable expansion) + */ +%%{ + machine main; + alphtype unsigned char; + + + escape = 0xb6; + ws = [ \t]; + nl = ('\n' | '\r'); + + action add_line { + /* strip trailing ws */ + while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back(); + + if (!scratch.empty()) { + std::string tmp = std::move(scratch); + scratch.clear(); + if (pipe_to) pipe_to(std::move(tmp)); + } + fgoto main; + } + + action push_back { + scratch.push_back(fc); + } + + action push_back_escape { + scratch.push_back(escape); + scratch.push_back(fc); + } + + comment = '#' (any-nl)*; + + escape_seq = + escape + ( + nl ${ /* esc newline */ line++; } + | + (any-nl) $push_back_escape + ) + ; + + + # single-quoted string. only escape \n is special. + # handling is so stupid I'm not going to support it. + + sstring = + ['] $push_back + ( (any-nl-[']) $push_back )* + ['] $push_back + $err{ + throw std::runtime_error("### MPW Shell - 's must occur in pairs."); + } + ; + + # same quoting logic as ' string + vstring = + '{' $push_back + ( (any-nl-'}') $push_back )* + '}' $push_back + $err{ + throw std::runtime_error("### MPW Shell - {s must occur in pairs."); + } + ; + + + # double-quoted string. + # escape \n is ignored. others do nothing. + dstring = + ["] $push_back + ( + escape_seq + | + vstring + | + (any-escape-nl-["{]) $push_back + )* ["] $push_back + $err{ + throw std::runtime_error("### MPW Shell - \"s must occur in pairs."); + } + ; + + # this is a mess ... + coalesce_ws = + ws + ( + ws + | + escape nl ${ line++; } + )* + <: + any ${ scratch.push_back(' '); fhold; } + ; + + line := + ( + sstring + | + dstring + | + vstring + | + escape_seq + | + coalesce_ws + | + (any-escape-nl-ws-[#'"{]) $push_back + )* + comment? + nl ${ line++; } $add_line + ; + + main := + # strip leading whitespace. + ws* + <: # left guard -- higher priority to ws. + any ${ fhold; fgoto line; } + ; + +}%% + + + +namespace { + %% write data; +} + +phase1::phase1() { + %% write init; +} + +void phase1::reset() { + %% write init; + scratch.clear(); + // line = 1? +} + +void phase1::process(const unsigned char *begin, const unsigned char *end, bool final) { + + int start_line; + + const unsigned char *p = begin; + const unsigned char *pe = end; + const unsigned char *eof = nullptr; + + if (final) + eof = pe; + + %% write exec; + + if (cs == main_error) { + throw std::runtime_error("MPW Shell - Lexer error."); + } + +#if 0 + if (cs != main_start && final) { + // will this happen? + throw std::runtime_error("MPW Shell - Lexer error."); + } +#endif +} diff --git a/phase2.h b/phase2.h new file mode 100644 index 0000000..9b69c84 --- /dev/null +++ b/phase2.h @@ -0,0 +1,44 @@ + +#ifndef __phase2_h__ +#define __phase2_h__ + +#include +#include +#include +#include + +typedef std::unique_ptr command_ptr; +typedef std::vector command_ptr_vector; + +class phase2 { + +public: + typedef std::function pipe_function; + + void process(const std::string &line); + void finish(); + + virtual void syntax_error(); + virtual void parse_accept(); + virtual void parse(int, std::string &&); + + phase2 &operator >>=(pipe_function f) { pipe_to = f; return *this; } + +private: + + std::string scratch; + int type = 0; + bool error = false; + bool immediate = false; + + pipe_function pipe_to; + + void flush(); + bool special(); + void classify(); + void exec(); + + command_ptr_vector command_queue; +}; + +#endif diff --git a/phase2.rl b/phase2.rl new file mode 100644 index 0000000..797f98a --- /dev/null +++ b/phase2.rl @@ -0,0 +1,182 @@ +/* + * phase2 -- parse a line into major control structures (begin/end/if/etc) + * input is a full line -- comments have been removed, escape-nl handled, trailing newline stripped. + * + */ + +#include "mpw-shell-grammar.h" +#include "phase2.h" +#include "command.h" + +%%{ + machine main; + alphtype unsigned char; + + action not_special { !special() } + + ws = [ \t]; + + main := |* + '||' when not_special => { + flush(); + parse(PIPE_PIPE, std::string(ts, te)); + }; + + '&&' when not_special => { + flush(); + parse(AMP_AMP, std::string(ts, te)); + }; + + '(' when not_special => { + flush(); + parse(LPAREN, std::string(ts, te)); + }; + + # ) may include redirection so start a new token but don't parse it yet. + ')' when not_special => { + flush(); + scratch.push_back(fc); + type = RPAREN; + }; + + # todo -- also add in strings and escapes. + + ';' => { flush(); parse(SEMI, ";"); }; + ws => { if (!scratch.empty()) scratch.push_back(fc); }; + any => { scratch.push_back(fc); }; + *|; +}%% + + +%%{ + machine classify; + alphtype unsigned char; + + ws = [ \t]; + + IF = /if/i; + ELSE = /else/i; + END = /end/i; + BEGIN = /begin/i; + EVALUATE = /evaluate/i; + + + main := |* + IF %eof{ type = IF; return; }; + IF ws => { type = IF; return; }; + + ELSE %eof{ type = ELSE; return; }; + ELSE ws => { type = ELSE; return; }; + + ELSE ws+ IF %eof{ type = ELSE_IF; return; }; + ELSE ws+ IF ws => { type = ELSE_IF; return; }; + + EVALUATE %eof{ type = EVALUATE; return; }; + EVALUATE ws => { type = EVALUATE; return; }; + + END %eof{ type = END; return; }; + END ws => { type = END; return; }; + + BEGIN %eof{ type = BEGIN; return; }; + BEGIN ws => { type = BEGIN; return; }; + + ')' => { type = LPAREN; return; }; + *|; + +}%% + + +namespace { + %% machine classify; + %% write data; + %% machine main; + %% write data; +} + +void phase2::flush() { + // remove white space... + while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back(); + + + if (!scratch.empty()) { + if (!type) classify(); + parse(type, std::move(scratch)); + } + + type = 0; + scratch.clear(); +} + +bool phase2::special() { + if (!type) classify(); + + switch (type) { + case IF: + case ELSE: + case ELSE_IF: + case EVALUATE: + return true; + default: + return false; + } +} + +void phase2::classify() { + if (type) return; + if (scratch.empty()) return; + + int cs; + int act; + const unsigned char *p = (const unsigned char *)scratch.data(); + const unsigned char *pe = p + scratch.size(); + const unsigned char *eof = pe; + const unsigned char *te, *ts; + + type = COMMAND; + + %% machine classify; + %% write init; + %% write exec; +} + +void phase2::process(const std::string &line) { + + + int cs; + int act; + const unsigned char *p = (const unsigned char *)line.data(); + const unsigned char *pe = p + line.size(); + const unsigned char *eof = pe; + const unsigned char *te, *ts; + + scratch.clear(); + type = 0; + + %% machine main; + %% write init; + %% write exec; + + flush(); + parse(NL, ""); + + exec(); +} + +void phase2::finish() { + parse(0, ""); + exec(); +} + +void phase2::exec() { + + if (pipe_to) { + for (auto &p : command_queue) { + if (p) { + pipe_to(std::move(p)); + } + } + command_queue.clear(); + } + +} +