mirror of
https://github.com/ksherlock/mpw-shell.git
synced 2024-12-28 09:29:57 +00:00
pipe-lined lexing/parsing.
This commit is contained in:
parent
4297c7095e
commit
2157dc0ba8
39
phase1.h
Normal file
39
phase1.h
Normal file
@ -0,0 +1,39 @@
|
||||
#ifndef __phase1_h__
|
||||
#define __phase1_h__
|
||||
|
||||
|
||||
#include <string>
|
||||
#include <functional>
|
||||
|
||||
class phase1 {
|
||||
|
||||
public:
|
||||
typedef std::function<void(std::string &&)> pipe_function;
|
||||
|
||||
phase1();
|
||||
|
||||
void process(const unsigned char *begin, const unsigned char *end, bool final = false);
|
||||
|
||||
void process(const char *begin, const char *end, bool final = false) {
|
||||
process((const unsigned char *)begin, (const unsigned char *)end, final);
|
||||
}
|
||||
|
||||
void process(const std::string &s) { process(s.data(), s.data() + s.size()); }
|
||||
|
||||
void finish() { const char *tmp = ""; process(tmp, tmp, true); }
|
||||
|
||||
void reset();
|
||||
|
||||
//template<class F>
|
||||
//phase1 &operator >>= (F &&f) { pipe_to = pipe_function(f); return *this; }
|
||||
|
||||
phase1 &operator >>= (pipe_function f) { pipe_to = f; return *this; }
|
||||
|
||||
private:
|
||||
std::string scratch;
|
||||
pipe_function pipe_to;
|
||||
int line = 1;
|
||||
int cs = 0;
|
||||
};
|
||||
|
||||
#endif
|
180
phase1.rl
Normal file
180
phase1.rl
Normal file
@ -0,0 +1,180 @@
|
||||
/*
|
||||
* phase1 -- strip comments and merge multi-lines.
|
||||
*
|
||||
*/
|
||||
|
||||
|
||||
#include "phase1.h"
|
||||
#include <stdexcept>
|
||||
#include <stdint.h>
|
||||
|
||||
const unsigned char escape = 0xb6;
|
||||
|
||||
/*
|
||||
* from experimentation, mpw splits on ; after variable expansion;
|
||||
* this splits before. something stupid like:
|
||||
* set q '"'; echo {q} ; "
|
||||
* will not be handled correctly. oh well.
|
||||
* (should probably just drop that and we can then combine tokenizing w/
|
||||
* variable expansion)
|
||||
*/
|
||||
%%{
|
||||
machine main;
|
||||
alphtype unsigned char;
|
||||
|
||||
|
||||
escape = 0xb6;
|
||||
ws = [ \t];
|
||||
nl = ('\n' | '\r');
|
||||
|
||||
action add_line {
|
||||
/* strip trailing ws */
|
||||
while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();
|
||||
|
||||
if (!scratch.empty()) {
|
||||
std::string tmp = std::move(scratch);
|
||||
scratch.clear();
|
||||
if (pipe_to) pipe_to(std::move(tmp));
|
||||
}
|
||||
fgoto main;
|
||||
}
|
||||
|
||||
action push_back {
|
||||
scratch.push_back(fc);
|
||||
}
|
||||
|
||||
action push_back_escape {
|
||||
scratch.push_back(escape);
|
||||
scratch.push_back(fc);
|
||||
}
|
||||
|
||||
comment = '#' (any-nl)*;
|
||||
|
||||
escape_seq =
|
||||
escape
|
||||
(
|
||||
nl ${ /* esc newline */ line++; }
|
||||
|
|
||||
(any-nl) $push_back_escape
|
||||
)
|
||||
;
|
||||
|
||||
|
||||
# single-quoted string. only escape \n is special.
|
||||
# handling is so stupid I'm not going to support it.
|
||||
|
||||
sstring =
|
||||
['] $push_back
|
||||
( (any-nl-[']) $push_back )*
|
||||
['] $push_back
|
||||
$err{
|
||||
throw std::runtime_error("### MPW Shell - 's must occur in pairs.");
|
||||
}
|
||||
;
|
||||
|
||||
# same quoting logic as ' string
|
||||
vstring =
|
||||
'{' $push_back
|
||||
( (any-nl-'}') $push_back )*
|
||||
'}' $push_back
|
||||
$err{
|
||||
throw std::runtime_error("### MPW Shell - {s must occur in pairs.");
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
# double-quoted string.
|
||||
# escape \n is ignored. others do nothing.
|
||||
dstring =
|
||||
["] $push_back
|
||||
(
|
||||
escape_seq
|
||||
|
|
||||
vstring
|
||||
|
|
||||
(any-escape-nl-["{]) $push_back
|
||||
)* ["] $push_back
|
||||
$err{
|
||||
throw std::runtime_error("### MPW Shell - \"s must occur in pairs.");
|
||||
}
|
||||
;
|
||||
|
||||
# this is a mess ...
|
||||
coalesce_ws =
|
||||
ws
|
||||
(
|
||||
ws
|
||||
|
|
||||
escape nl ${ line++; }
|
||||
)*
|
||||
<:
|
||||
any ${ scratch.push_back(' '); fhold; }
|
||||
;
|
||||
|
||||
line :=
|
||||
(
|
||||
sstring
|
||||
|
|
||||
dstring
|
||||
|
|
||||
vstring
|
||||
|
|
||||
escape_seq
|
||||
|
|
||||
coalesce_ws
|
||||
|
|
||||
(any-escape-nl-ws-[#'"{]) $push_back
|
||||
)*
|
||||
comment?
|
||||
nl ${ line++; } $add_line
|
||||
;
|
||||
|
||||
main :=
|
||||
# strip leading whitespace.
|
||||
ws*
|
||||
<: # left guard -- higher priority to ws.
|
||||
any ${ fhold; fgoto line; }
|
||||
;
|
||||
|
||||
}%%
|
||||
|
||||
|
||||
|
||||
namespace {
|
||||
%% write data;
|
||||
}
|
||||
|
||||
phase1::phase1() {
|
||||
%% write init;
|
||||
}
|
||||
|
||||
void phase1::reset() {
|
||||
%% write init;
|
||||
scratch.clear();
|
||||
// line = 1?
|
||||
}
|
||||
|
||||
void phase1::process(const unsigned char *begin, const unsigned char *end, bool final) {
|
||||
|
||||
int start_line;
|
||||
|
||||
const unsigned char *p = begin;
|
||||
const unsigned char *pe = end;
|
||||
const unsigned char *eof = nullptr;
|
||||
|
||||
if (final)
|
||||
eof = pe;
|
||||
|
||||
%% write exec;
|
||||
|
||||
if (cs == main_error) {
|
||||
throw std::runtime_error("MPW Shell - Lexer error.");
|
||||
}
|
||||
|
||||
#if 0
|
||||
if (cs != main_start && final) {
|
||||
// will this happen?
|
||||
throw std::runtime_error("MPW Shell - Lexer error.");
|
||||
}
|
||||
#endif
|
||||
}
|
44
phase2.h
Normal file
44
phase2.h
Normal file
@ -0,0 +1,44 @@
|
||||
|
||||
#ifndef __phase2_h__
|
||||
#define __phase2_h__
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
||||
typedef std::unique_ptr<struct command> command_ptr;
|
||||
typedef std::vector<command_ptr> command_ptr_vector;
|
||||
|
||||
class phase2 {
|
||||
|
||||
public:
|
||||
typedef std::function<void(command_ptr &&)> pipe_function;
|
||||
|
||||
void process(const std::string &line);
|
||||
void finish();
|
||||
|
||||
virtual void syntax_error();
|
||||
virtual void parse_accept();
|
||||
virtual void parse(int, std::string &&);
|
||||
|
||||
phase2 &operator >>=(pipe_function f) { pipe_to = f; return *this; }
|
||||
|
||||
private:
|
||||
|
||||
std::string scratch;
|
||||
int type = 0;
|
||||
bool error = false;
|
||||
bool immediate = false;
|
||||
|
||||
pipe_function pipe_to;
|
||||
|
||||
void flush();
|
||||
bool special();
|
||||
void classify();
|
||||
void exec();
|
||||
|
||||
command_ptr_vector command_queue;
|
||||
};
|
||||
|
||||
#endif
|
182
phase2.rl
Normal file
182
phase2.rl
Normal file
@ -0,0 +1,182 @@
|
||||
/*
|
||||
* phase2 -- parse a line into major control structures (begin/end/if/etc)
|
||||
* input is a full line -- comments have been removed, escape-nl handled, trailing newline stripped.
|
||||
*
|
||||
*/
|
||||
|
||||
#include "mpw-shell-grammar.h"
|
||||
#include "phase2.h"
|
||||
#include "command.h"
|
||||
|
||||
%%{
|
||||
machine main;
|
||||
alphtype unsigned char;
|
||||
|
||||
action not_special { !special() }
|
||||
|
||||
ws = [ \t];
|
||||
|
||||
main := |*
|
||||
'||' when not_special => {
|
||||
flush();
|
||||
parse(PIPE_PIPE, std::string(ts, te));
|
||||
};
|
||||
|
||||
'&&' when not_special => {
|
||||
flush();
|
||||
parse(AMP_AMP, std::string(ts, te));
|
||||
};
|
||||
|
||||
'(' when not_special => {
|
||||
flush();
|
||||
parse(LPAREN, std::string(ts, te));
|
||||
};
|
||||
|
||||
# ) may include redirection so start a new token but don't parse it yet.
|
||||
')' when not_special => {
|
||||
flush();
|
||||
scratch.push_back(fc);
|
||||
type = RPAREN;
|
||||
};
|
||||
|
||||
# todo -- also add in strings and escapes.
|
||||
|
||||
';' => { flush(); parse(SEMI, ";"); };
|
||||
ws => { if (!scratch.empty()) scratch.push_back(fc); };
|
||||
any => { scratch.push_back(fc); };
|
||||
*|;
|
||||
}%%
|
||||
|
||||
|
||||
%%{
|
||||
machine classify;
|
||||
alphtype unsigned char;
|
||||
|
||||
ws = [ \t];
|
||||
|
||||
IF = /if/i;
|
||||
ELSE = /else/i;
|
||||
END = /end/i;
|
||||
BEGIN = /begin/i;
|
||||
EVALUATE = /evaluate/i;
|
||||
|
||||
|
||||
main := |*
|
||||
IF %eof{ type = IF; return; };
|
||||
IF ws => { type = IF; return; };
|
||||
|
||||
ELSE %eof{ type = ELSE; return; };
|
||||
ELSE ws => { type = ELSE; return; };
|
||||
|
||||
ELSE ws+ IF %eof{ type = ELSE_IF; return; };
|
||||
ELSE ws+ IF ws => { type = ELSE_IF; return; };
|
||||
|
||||
EVALUATE %eof{ type = EVALUATE; return; };
|
||||
EVALUATE ws => { type = EVALUATE; return; };
|
||||
|
||||
END %eof{ type = END; return; };
|
||||
END ws => { type = END; return; };
|
||||
|
||||
BEGIN %eof{ type = BEGIN; return; };
|
||||
BEGIN ws => { type = BEGIN; return; };
|
||||
|
||||
')' => { type = LPAREN; return; };
|
||||
*|;
|
||||
|
||||
}%%
|
||||
|
||||
|
||||
namespace {
|
||||
%% machine classify;
|
||||
%% write data;
|
||||
%% machine main;
|
||||
%% write data;
|
||||
}
|
||||
|
||||
void phase2::flush() {
|
||||
// remove white space...
|
||||
while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();
|
||||
|
||||
|
||||
if (!scratch.empty()) {
|
||||
if (!type) classify();
|
||||
parse(type, std::move(scratch));
|
||||
}
|
||||
|
||||
type = 0;
|
||||
scratch.clear();
|
||||
}
|
||||
|
||||
bool phase2::special() {
|
||||
if (!type) classify();
|
||||
|
||||
switch (type) {
|
||||
case IF:
|
||||
case ELSE:
|
||||
case ELSE_IF:
|
||||
case EVALUATE:
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
void phase2::classify() {
|
||||
if (type) return;
|
||||
if (scratch.empty()) return;
|
||||
|
||||
int cs;
|
||||
int act;
|
||||
const unsigned char *p = (const unsigned char *)scratch.data();
|
||||
const unsigned char *pe = p + scratch.size();
|
||||
const unsigned char *eof = pe;
|
||||
const unsigned char *te, *ts;
|
||||
|
||||
type = COMMAND;
|
||||
|
||||
%% machine classify;
|
||||
%% write init;
|
||||
%% write exec;
|
||||
}
|
||||
|
||||
void phase2::process(const std::string &line) {
|
||||
|
||||
|
||||
int cs;
|
||||
int act;
|
||||
const unsigned char *p = (const unsigned char *)line.data();
|
||||
const unsigned char *pe = p + line.size();
|
||||
const unsigned char *eof = pe;
|
||||
const unsigned char *te, *ts;
|
||||
|
||||
scratch.clear();
|
||||
type = 0;
|
||||
|
||||
%% machine main;
|
||||
%% write init;
|
||||
%% write exec;
|
||||
|
||||
flush();
|
||||
parse(NL, "");
|
||||
|
||||
exec();
|
||||
}
|
||||
|
||||
void phase2::finish() {
|
||||
parse(0, "");
|
||||
exec();
|
||||
}
|
||||
|
||||
void phase2::exec() {
|
||||
|
||||
if (pipe_to) {
|
||||
for (auto &p : command_queue) {
|
||||
if (p) {
|
||||
pipe_to(std::move(p));
|
||||
}
|
||||
}
|
||||
command_queue.clear();
|
||||
}
|
||||
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user