mpw-shell/phase1.rl
2016-01-29 22:23:14 -05:00

181 lines
2.8 KiB
Ragel

/*
* phase1 -- strip comments and merge multi-lines.
*
*/
#include "phase1.h"
#include <stdexcept>
#include <stdint.h>
const unsigned char escape = 0xb6;
/*
* from experimentation, mpw splits on ; after variable expansion;
* this splits before. something stupid like:
* set q '"'; echo {q} ; "
* will not be handled correctly. oh well.
* (should probably just drop that and we can then combine tokenizing w/
* variable expansion)
*/
%%{
machine main;
alphtype unsigned char;
escape = 0xb6;
ws = [ \t];
nl = ('\n' | '\r');
action add_line {
/* strip trailing ws */
while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();
if (!scratch.empty()) {
std::string tmp = std::move(scratch);
scratch.clear();
if (pipe_to) pipe_to(std::move(tmp));
}
fgoto main;
}
action push_back {
scratch.push_back(fc);
}
action push_back_escape {
scratch.push_back(escape);
scratch.push_back(fc);
}
comment = '#' (any-nl)*;
escape_seq =
escape
(
nl ${ /* esc newline */ line++; }
|
(any-nl) $push_back_escape
)
;
# single-quoted string. only escape \n is special.
# handling is so stupid I'm not going to support it.
sstring =
['] $push_back
( (any-nl-[']) $push_back )*
['] $push_back
$err{
throw std::runtime_error("### MPW Shell - 's must occur in pairs.");
}
;
# same quoting logic as ' string
vstring =
'{' $push_back
( (any-nl-'}') $push_back )*
'}' $push_back
$err{
throw std::runtime_error("### MPW Shell - {s must occur in pairs.");
}
;
# double-quoted string.
# escape \n is ignored. others do nothing.
dstring =
["] $push_back
(
escape_seq
|
vstring
|
(any-escape-nl-["{]) $push_back
)* ["] $push_back
$err{
throw std::runtime_error("### MPW Shell - \"s must occur in pairs.");
}
;
# this is a mess ...
coalesce_ws =
ws
(
ws
|
escape nl ${ line++; }
)*
<:
any ${ scratch.push_back(' '); fhold; }
;
line :=
(
sstring
|
dstring
|
vstring
|
escape_seq
|
coalesce_ws
|
(any-escape-nl-ws-[#'"{]) $push_back
)*
comment?
nl ${ line++; } $add_line
;
main :=
# strip leading whitespace.
ws*
<: # left guard -- higher priority to ws.
any ${ fhold; fgoto line; }
;
}%%
namespace {
%% write data;
}
phase1::phase1() {
%% write init;
}
void phase1::reset() {
%% write init;
scratch.clear();
// line = 1?
}
void phase1::process(const unsigned char *begin, const unsigned char *end, bool final) {
int start_line;
const unsigned char *p = begin;
const unsigned char *pe = end;
const unsigned char *eof = nullptr;
if (final)
eof = pe;
%% write exec;
if (cs == main_error) {
throw std::runtime_error("MPW Shell - Lexer error.");
}
#if 0
if (cs != main_start && final) {
// will this happen?
throw std::runtime_error("MPW Shell - Lexer error.");
}
#endif
}