diff --git a/CMakeLists.txt b/CMakeLists.txt index 876f679..964dbcb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,19 +54,14 @@ macro(RAGEL_TARGET Name Input Output) endmacro() - - - -#add_custom_command( -# OUTPUT mpw-shell-read.cpp -# COMMAND ragel -p -G2 -o mpw-shell-read.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-read.rl" -# MAIN_DEPENDENCY mpw-shell-read.rl -#) - -RAGEL_TARGET(mpw-shell-read mpw-shell-read.rl mpw-shell-read.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(phase1 phase1.rl phase1.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(phase2 phase2.rl phase2.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(pathnames pathnames.rl pathnames.cpp COMPILE_FLAGS "-p -G2") +RAGEL_TARGET(mpw-shell-token mpw-shell-token.rl mpw-shell-token.cpp COMPILE_FLAGS "-p -G2") +RAGEL_TARGET(mpw-shell-expand mpw-shell-expand.rl mpw-shell-expand.cpp COMPILE_FLAGS "-p -G2") +RAGEL_TARGET(mpw-shell-quote mpw-shell-quote.rl mpw-shell-quote.cpp COMPILE_FLAGS "-p -G2") +RAGEL_TARGET(value value.rl value.cpp COMPILE_FLAGS "-p -G2") + # need to copy all OUTPUT file to the build dir add_custom_command( @@ -78,40 +73,7 @@ add_custom_command( WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR} ) -add_custom_command( - OUTPUT mpw-shell-expand.cpp - COMMAND ragel -p -G2 -o mpw-shell-expand.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-expand.rl" - MAIN_DEPENDENCY mpw-shell-expand.rl -) -add_custom_command( - OUTPUT mpw-shell-token.cpp - COMMAND ragel -p -G2 -o mpw-shell-token.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-token.rl" - MAIN_DEPENDENCY mpw-shell-token.rl -) - -add_custom_command( - OUTPUT mpw-shell-command.cpp - COMMAND ragel -p -G2 -o mpw-shell-command.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-command.rl" - MAIN_DEPENDENCY mpw-shell-command.rl -) - -add_custom_command( - OUTPUT value.cpp - COMMAND ragel -p -G2 -o value.cpp "${CMAKE_CURRENT_SOURCE_DIR}/value.rl" - MAIN_DEPENDENCY value.rl -) - - - -add_custom_command( - OUTPUT mpw-shell-quote.cpp - COMMAND ragel -p -G2 -o mpw-shell-quote.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-quote.rl" - MAIN_DEPENDENCY mpw-shell-quote.rl -) - - -# mpw-shell-execute.cpp mpw-shell-builtins.cpp mpw-shell-read.cpp add_executable(mpw-shell mpw-shell.cpp mpw-shell-token.cpp mpw-shell-expand.cpp mpw-shell-parser.cpp value.cpp mpw-shell-quote.cpp phase1.cpp phase2.cpp phase2-parser.cpp command.cpp environment.cpp builtins.cpp diff --git a/mpw-shell-command.rl b/mpw-shell-command.rl deleted file mode 100644 index 8250090..0000000 --- a/mpw-shell-command.rl +++ /dev/null @@ -1,123 +0,0 @@ -#include -#include -#include -#include - -#include -#include - -#include "mpw-shell.h" - -%%{ - machine classify; - alphtype unsigned char; - - ws = [ \t]; - - IF = /if/i; - ELSE = /else/i; - END = /end/i; - EVALUATE = /evaluate/i; - - - main := |* - IF %eof{ return command_if; }; - IF ws => {return command_if; }; - - ELSE %eof{ return command_else;}; - ELSE ws => { return command_else; }; - - ELSE ws+ IF %eof{ return command_else_if; }; - ELSE ws+ IF ws => {return command_else_if; }; - - END %eof{ return command_end; }; - END ws => {return command_end; }; - - EVALUATE %eof{ return command_evaluate; }; - EVALUATE ws => {return command_evaluate; }; - - - *|; - -}%% - - -int classify(const std::string &line) { - - %% write data; - - int cs; - int act; - - const unsigned char *p = (const unsigned char *)line.data(); - const unsigned char *pe = (const unsigned char *)line.data() + line.size(); - const unsigned char *eof = pe; - const unsigned char *te, *ts; - - %%write init; - - %%write exec; - - return 0; -} - - -/* - * Generates a linked-list of commands. Why? Because it also checks - * for shell-special syntax (currently if / else /end only) and - * adds pointers to make executing them easier. - * - */ -command_ptr build_command(const std::vector &lines) { - - std::vector if_stack; - - command_ptr head; - command_ptr prev; - - for (const auto &line : lines) { - if (line.empty()) continue; - - int type = classify(line); - command_ptr c = std::make_shared(type, line); - - if (!head) head = c; - if (!prev) prev = c; - else { - prev->next = c; - prev = c; - } - - // if stack... - switch (type) { - case command_if: - if_stack.push_back(c); - break; - - case command_else: - case command_else_if: - - if (if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Else must be within if ... end."); - } - - if_stack.back()->alternate = c; - if_stack.back() = c; - break; - - case command_end: - if (if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Extra end command."); - } - if_stack.back()->alternate = c; - if_stack.pop_back(); - break; - } - } - - if (!if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Unterminated if command."); - } - - return head; -} diff --git a/mpw-shell-commands.c b/mpw-shell-commands.c deleted file mode 100644 index 922196e..0000000 --- a/mpw-shell-commands.c +++ /dev/null @@ -1,301 +0,0 @@ - -#line 1 "mpw-shell-commands.rl" -#include -#include -#include -#include - -#include - -typedef std::shared_ptr command_ptr; -typedef std::weak_ptr weak_command_ptr; - -class command { - enum type { - command_if = 1, - command_else, - command_else_if, - command_end - } = 0; - std::string line; - command_ptr next; - weak_command_ptr alternate; // if -> else -> end. -}; - - - -#line 49 "mpw-shell-commands.rl" - - - -int classify(const std::string &line) { - - -#line 35 "mpw-shell-commands.c" -static const int classify_start = 8; -static const int classify_first_final = 8; -static const int classify_error = 0; - -static const int classify_en_main = 8; - - -#line 55 "mpw-shell-commands.rl" - - int cs; - const unsigned char *p = (const unsigned char *)line.data(); - const unsigned char *pe = (const unsigned char *)line.data() + line.size(); - const unsigned char *eof = pe; - const unsigned char *te, *ts; - - -#line 52 "mpw-shell-commands.c" - { - cs = classify_start; - ts = 0; - te = 0; - act = 0; - } - -#line 63 "mpw-shell-commands.rl" - - -#line 63 "mpw-shell-commands.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -tr5: -#line 40 "mpw-shell-commands.rl" - {{p = ((te))-1;}{ return command_else; }} - goto st8; -tr13: -#line 39 "mpw-shell-commands.rl" - { return command_else;} -#line 39 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr14: -#line 39 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr16: -#line 40 "mpw-shell-commands.rl" - {te = p;p--;{ return command_else; }} - goto st8; -tr17: -#line 42 "mpw-shell-commands.rl" - { return command_else_if; } -#line 42 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr18: -#line 42 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr19: -#line 43 "mpw-shell-commands.rl" - {te = p+1;{return command_else_if; }} - goto st8; -tr20: -#line 45 "mpw-shell-commands.rl" - { return command_end; } -#line 45 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr21: -#line 45 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr22: -#line 46 "mpw-shell-commands.rl" - {te = p+1;{return command_end; }} - goto st8; -tr23: -#line 36 "mpw-shell-commands.rl" - { return command_if; } -#line 36 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr24: -#line 36 "mpw-shell-commands.rl" - {te = p;p--;} - goto st8; -tr25: -#line 37 "mpw-shell-commands.rl" - {te = p+1;{return command_if; }} - goto st8; -st8: -#line 1 "NONE" - {ts = 0;} - if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 1 "NONE" - {ts = p;} -#line 137 "mpw-shell-commands.c" - switch( (*p) ) { - case 69u: goto st1; - case 73u: goto st7; - case 101u: goto st1; - case 105u: goto st7; - } - goto st0; -st0: -cs = 0; - goto _out; -st1: - if ( ++p == pe ) - goto _test_eof1; -case 1: - switch( (*p) ) { - case 76u: goto st2; - case 78u: goto st6; - case 108u: goto st2; - case 110u: goto st6; - } - goto st0; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - switch( (*p) ) { - case 83u: goto st3; - case 115u: goto st3; - } - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - switch( (*p) ) { - case 69u: goto st9; - case 101u: goto st9; - } - goto st0; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: - switch( (*p) ) { - case 9u: goto tr15; - case 32u: goto tr15; - } - goto tr14; -tr15: -#line 1 "NONE" - {te = p+1;} - goto st10; -st10: - if ( ++p == pe ) - goto _test_eof10; -case 10: -#line 194 "mpw-shell-commands.c" - switch( (*p) ) { - case 9u: goto st4; - case 32u: goto st4; - case 73u: goto st5; - case 105u: goto st5; - } - goto tr16; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: - switch( (*p) ) { - case 9u: goto st4; - case 32u: goto st4; - case 73u: goto st5; - case 105u: goto st5; - } - goto tr5; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - switch( (*p) ) { - case 70u: goto st11; - case 102u: goto st11; - } - goto tr5; -st11: - if ( ++p == pe ) - goto _test_eof11; -case 11: - switch( (*p) ) { - case 9u: goto tr19; - case 32u: goto tr19; - } - goto tr18; -st6: - if ( ++p == pe ) - goto _test_eof6; -case 6: - switch( (*p) ) { - case 68u: goto st12; - case 100u: goto st12; - } - goto st0; -st12: - if ( ++p == pe ) - goto _test_eof12; -case 12: - switch( (*p) ) { - case 9u: goto tr22; - case 32u: goto tr22; - } - goto tr21; -st7: - if ( ++p == pe ) - goto _test_eof7; -case 7: - switch( (*p) ) { - case 70u: goto st13; - case 102u: goto st13; - } - goto st0; -st13: - if ( ++p == pe ) - goto _test_eof13; -case 13: - switch( (*p) ) { - case 9u: goto tr25; - case 32u: goto tr25; - } - goto tr24; - } - _test_eof8: cs = 8; goto _test_eof; - _test_eof1: cs = 1; goto _test_eof; - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; - _test_eof10: cs = 10; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; - _test_eof11: cs = 11; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; - _test_eof12: cs = 12; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; - _test_eof13: cs = 13; goto _test_eof; - - _test_eof: {} - if ( p == eof ) - { - switch ( cs ) { - case 9: goto tr13; - case 10: goto tr16; - case 4: goto tr5; - case 5: goto tr5; - case 11: goto tr17; - case 12: goto tr20; - case 13: goto tr23; - } - } - - _out: {} - } - -#line 65 "mpw-shell-commands.rl" - - return 0; -} diff --git a/mpw-shell-read.rl b/mpw-shell-read.rl deleted file mode 100644 index e632db2..0000000 --- a/mpw-shell-read.rl +++ /dev/null @@ -1,379 +0,0 @@ -#include "mpw-shell.h" - -#include -#include -#include -#include - -%%{ - machine classify; - alphtype unsigned char; - - ws = [ \t]; - - IF = /if/i; - ELSE = /else/i; - END = /end/i; - BEGIN = /begin/i; - EVALUATE = /evaluate/i; - - - main := |* - IF %eof{ return command_if; }; - IF ws => {return command_if; }; - - ELSE %eof{ return command_else;}; - ELSE ws => { return command_else; }; - - ELSE ws+ IF %eof{ return command_else_if; }; - ELSE ws+ IF ws => {return command_else_if; }; - - END %eof{ return command_end; }; - END ws => {return command_end; }; - - BEGIN %eof{ return command_begin; }; - BEGIN ws => {return command_begin; }; - - EVALUATE %eof{ return command_evaluate; }; - EVALUATE ws => {return command_evaluate; }; - - *|; - -}%% - - -static int classify(const std::string &line) { - - %%machine classify; - %% write data; - - int cs; - int act; - - const unsigned char *p = (const unsigned char *)line.data(); - const unsigned char *pe = (const unsigned char *)line.data() + line.size(); - const unsigned char *eof = pe; - const unsigned char *te, *ts; - - %%write init; - - %%write exec; - - return 0; -} - - -/* - * this state machine splits input into lines. - * only new-line escapes are removed. - * "", '', and {} are also matched. - * - */ - -/* - * from experimentation, mpw splits on ; after variable expansion; - * this splits before. something stupid like: - * set q '"'; echo {q} ; " - * will not be handled correctly. oh well. - * (should probably just drop that and we can then combine tokenizing w/ - * variable expansion) - */ -%%{ - machine line_parser; - alphtype unsigned char; - - - escape = 0xb6; - ws = [ \t]; - nl = ('\n' | '\r'); - - action add_line { - /* strip trailing ws */ - while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back(); - if (!scratch.empty()) { - command_ptr cmd = std::make_shared(std::move(scratch)); - cmd->line = start_line; - start_line = line; - program.emplace_back(std::move(cmd)); - } - scratch.clear(); - fgoto main; - } - - action push_back { - scratch.push_back(fc); - } - - action push_back_escape { - scratch.push_back(escape); - scratch.push_back(fc); - } - - comment = '#' (any-nl)*; - - escape_seq = - escape - ( - nl ${ /* esc newline */ line++; } - | - (any-nl) $push_back_escape - ) - ; - - - # single-quoted string. only escape \n is special. - # handling is so stupid I'm not going to support it. - - sstring = - ['] $push_back - ( (any-nl-[']) $push_back )* - ['] $push_back - $err{ - throw std::runtime_error("### MPW Shell - 's must occur in pairs."); - } - ; - - # same quoting logic as ' string - vstring = - '{' $push_back - ( (any-nl-'}') $push_back )* - '}' $push_back - $err{ - throw std::runtime_error("### MPW Shell - {s must occur in pairs."); - } - ; - - - # double-quoted string. - # escape \n is ignored. others do nothing. - dstring = - ["] $push_back - ( - escape_seq - | - vstring - | - (any-escape-nl-["{]) $push_back - )* ["] $push_back - $err{ - throw std::runtime_error("### MPW Shell - \"s must occur in pairs."); - } - ; - - # this is a mess ... - coalesce_ws = - ws - ( - ws - | - escape nl ${ line++; } - )* - <: - any ${ scratch.push_back(' '); fhold; } - ; - - line := - ( - sstring - | - dstring - | - vstring - | - [;] $add_line - | - escape_seq - | - coalesce_ws - | - (any-escape-nl-ws-[;#'"{]) $push_back - )* - comment? - nl ${ line++; } $add_line - ; - - main := - # strip leading whitespace. - ws* - <: # left guard -- higher priority to ws. - any ${ fhold; fgoto line; } - ; - -}%% - - - - - - -class line_parser { - - public: - - void process(const void *data, size_t size) { - process((const unsigned char *)data, size, false); - } - - command_ptr finish() { - process((const unsigned char *)"\n\n", 2, true); - return build_program(); - } - - line_parser(); - - private: - - %% machine line_parser; - %% write data; - - - std::vector program; - std::string scratch; - int line = 1; - int cs; - - command_ptr build_program(); - void process(const unsigned char *data, size_t size, bool final); -}; - -line_parser::line_parser() { - %% machine line_parser; - %% write init; -} - -void line_parser::process(const unsigned char *data, size_t size, bool final) { - - int start_line; - - const unsigned char *p = data; - const unsigned char *pe = data + size; - const unsigned char *eof = nullptr; - - if (final) - eof = pe; - - start_line = line; - %% machine line_parser; - %% write exec; - - if (cs == line_parser_error) { - throw std::runtime_error("MPW Shell - Lexer error."); - - } - - if (cs != line_parser_start && final) { - // will this happen? - throw std::runtime_error("MPW Shell - Lexer error."); - } -} - - -/* - * Generates a linked-list of commands. Why? Because it also checks - * for shell-special syntax (currently if / else /end only) and - * adds pointers to make executing them easier. - * - */ - -// todo -- use recursive descent parser, support begin/end, (), ||, &&, etc. -command_ptr line_parser::build_program() { - - - std::vector if_stack; - - command_ptr head; - command_ptr ptr; - - if (program.empty()) return head; - - std::reverse(program.begin(), program.end()); - - head = program.back(); - - while (!program.empty()) { - - if (ptr) ptr->next = program.back(); - - ptr = std::move(program.back()); - program.pop_back(); - - int type = ptr->type = classify(ptr->string); - - ptr->level = if_stack.size(); - - // if stack... - switch (type) { - default: - break; - - case command_if: - if_stack.push_back(ptr); - break; - - case command_else: - case command_else_if: - - if (if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Else must be within if ... end."); - } - - ptr->level--; - if_stack.back()->alternate = ptr; - if_stack.back() = ptr; - break; - - case command_end: - if (if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Extra end command."); - } - - ptr->level--; - if_stack.back()->alternate = ptr; - if_stack.pop_back(); - break; - } - } - - if (!if_stack.empty()) { - throw std::runtime_error("### MPW Shell - Unterminated if command."); - } - - return head; -} - - -command_ptr read_fd(int fd) { - unsigned char buffer[1024]; - - line_parser p; - - for(;;) { - ssize_t s = read(fd, buffer, sizeof(buffer)); - if (s < 0) { - if (errno == EINTR) continue; - throw std::runtime_error("MPW Shell - Read error."); - } - if (s == 0) break; - p.process(buffer, s); - } - return p.finish(); -} - -command_ptr read_file(const std::string &name) { - int fd; - fd = open(name.c_str(), O_RDONLY); - if (fd < 0) { - throw std::runtime_error("MPW Shell - Unable to open file " + name + "."); - } - - auto tmp = read_fd(fd); - close(fd); - return tmp; -} - -command_ptr read_string(const std::string &s) { - line_parser p; - - p.process(s.data(), s.size()); - return p.finish(); -}