commit 7034e1193e204c4a3579035408b040f3f44842b2 Author: Kelvin Sherlock Date: Wed Jan 27 10:43:34 2016 -0500 initial version diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..7569349 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.o +build/ diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..77fb185 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,51 @@ +set(CMAKE_CXX_COMPILER "clang++") +set(CMAKE_CXX_FLAGS "-std=c++14 -stdlib=libc++ -g -Wall -Wno-unused-const-variable -Wno-unused-variable -Wno-multichar -Wno-c++11-extensions") + +project("mpw-shell") +cmake_minimum_required(VERSION 2.6) + +add_definitions(-I ${CMAKE_SOURCE_DIR}/) + + +add_custom_command( + OUTPUT mpw-shell-read.cpp + COMMAND ragel -p -G2 -o mpw-shell-read.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-read.rl" + MAIN_DEPENDENCY mpw-shell-read.rl +) + +add_custom_command( + OUTPUT mpw-shell-expand.cpp + COMMAND ragel -p -G2 -o mpw-shell-expand.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-expand.rl" + MAIN_DEPENDENCY mpw-shell-expand.rl +) + +add_custom_command( + OUTPUT mpw-shell-token.cpp + COMMAND ragel -p -G2 -o mpw-shell-token.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-token.rl" + MAIN_DEPENDENCY mpw-shell-token.rl +) + +add_custom_command( + OUTPUT mpw-shell-command.cpp + COMMAND ragel -p -G2 -o mpw-shell-command.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-command.rl" + MAIN_DEPENDENCY mpw-shell-command.rl +) + +add_custom_command( + OUTPUT value.cpp + COMMAND ragel -p -G2 -o value.cpp "${CMAKE_CURRENT_SOURCE_DIR}/value.rl" + MAIN_DEPENDENCY value.rl +) + +add_custom_command( + OUTPUT mpw-shell-quote.cpp + COMMAND ragel -p -G2 -o mpw-shell-quote.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-quote.rl" + MAIN_DEPENDENCY mpw-shell-quote.rl +) + + + + +add_executable(mpw-shell mpw-shell.cpp mpw-shell-read.cpp mpw-shell-token.cpp mpw-shell-expand.cpp + mpw-shell-execute.cpp mpw-shell-builtins.cpp mpw-shell-parser.cpp value.cpp mpw-shell-quote.cpp) + diff --git a/command.h b/command.h new file mode 100644 index 0000000..5876b7b --- /dev/null +++ b/command.h @@ -0,0 +1,44 @@ +#include +#include +#include + +typedef std::unique_ptr command_ptr; +typedef std::vector command_ptr_vector; +typedef std::array command_ptr_pair; + +struct command { + enum { + + }; + int type; + virtual ~command(); + virtual int run(); +}; + +struct simple_command : public command { + std::string text; +}; + +struct binary_command : public command { + command_ptr_pair children; +}; + +struct or_command : public binary_command { + +}; + +struct and_command : public binary_command { + +}; + +struct begin_command : public command { + command_ptr_vector children; + std::string end; +}; + +struct if_command : public command { + std::string begin; + command_ptr_vector children; + command_ptr_vector else_clause; + std::string end; +}; diff --git a/fdset.h b/fdset.h new file mode 100644 index 0000000..ac519b6 --- /dev/null +++ b/fdset.h @@ -0,0 +1,155 @@ +#ifndef __fdset__ +#define __fdset__ + +#include +#include +#include +#include + +#include + +class fdset; +class fdmask; + +/* + * fdmask does not own the file descriptors and will not close them. + * + */ +class fdmask { + public: + + fdmask() = default; + fdmask(const fdmask &) = default; + fdmask(fdmask &&) = default; + + fdmask(const std::array &rhs) : _fds(rhs) + {} + +#if 0 + fdmask(std::initializer_list rhs) : _fds(rhs) + {} +#endif + + fdmask &operator=(const fdmask &) = default; + fdmask &operator=(fdmask &&) = default; + + fdmask &operator=(const std::array &rhs) { + _fds = rhs; + return *this; + } + +#if 0 + fdmask &operator=(std::initializer_list rhs) { + _fds = rhs; + } +#endif + + void dup() const { + // dup fds to stdin/stdout/stderr. + // called after fork, before exec. + + + #define __(index, target) \ + if (_fds[index] >= 0 && _fds[index] != target) dup2(_fds[index], target) + + __(0, STDIN_FILENO); + __(1, STDOUT_FILENO); + __(2, STDERR_FILENO); + + #undef __ + } + + + int operator[](unsigned index) const { + return _fds[index]; + } + + fdmask &operator|=(const fdmask &rhs) { + for (unsigned i = 0; i < 3; ++i) { + if (_fds[i] < 0) _fds[i] = rhs._fds[i]; + } + return *this; + } + + private: + friend class fdset; + std::array _fds = {{ -1, -1, -1 }}; +}; + +/* + * fd set owns it's descriptors and will close them. + * + * + */ + +class fdset { + public: + + fdset() = default; + fdset(const fdset &) = delete; + fdset(fdset && rhs) { + std::swap(rhs._fds, _fds); + } + + ~fdset() { + close(); + } + + fdset &operator=(const fdset &) = delete; + fdset &operator=(fdset &&rhs) { + if (&rhs != this) { + std::swap(_fds, rhs._fds); + rhs.close(); + } + return *this; + } + + void close(void) { + for (int &fd : _fds) { + if (fd >= 0) { + ::close(fd); + fd = -1; + } + } + } + + void set(int index, int fd) { + std::swap(fd, _fds[index]); + if (fd >= 0) ::close(fd); + } + + fdmask to_mask() const { + return fdmask(_fds); + } + + private: + + void reset() { + _fds = {{ -1, -1, -1 }}; + } + + + + std::array _fds = {{ -1, -1, -1 }}; + +}; + +inline fdmask operator|(const fdmask &lhs, const fdmask &rhs) { + fdmask tmp(lhs); + tmp |= rhs; + return tmp; +} + +inline fdmask operator|(const fdset &lhs, const fdmask &rhs) { + fdmask tmp(lhs.to_mask()); + tmp |= rhs; + return tmp; +} + +struct process { + std::vector arguments; + fdset fds; +}; + + +#endif diff --git a/mpw-shell-builtins.cpp b/mpw-shell-builtins.cpp new file mode 100644 index 0000000..c73f078 --- /dev/null +++ b/mpw-shell-builtins.cpp @@ -0,0 +1,506 @@ +#include "mpw-shell.h" + +#include "fdset.h" +#include "value.h" + +#include +#include +#include + +#include +#include + +namespace { + + + std::string &lowercase(std::string &s) { + std::transform(s.begin(), s.end(), s.begin(), [](char c){ return std::tolower(c); }); + return s; + } + + // doesn't handle flag arguments but builtins don't have arguments. + + template + std::vector getopt(const std::vector &argv, FX fx) { + + std::vector out; + out.reserve(argv.size()); + + std::copy_if(argv.begin()+1, argv.end(), std::back_inserter(out), [&fx](const std::string &s){ + + if (s.empty()) return false; // ? + if (s.front() == '-') { + std::for_each(s.begin() + 1, s.end(), fx); + return false; + } + return true; + }); + + return out; + } + + + + + /* + * the fdopen() will assume ownership of the fd and close it. + * this is not desirable. + */ + + int readfn(void *cookie, char *buffer, int size) { + return ::read((int)(ptrdiff_t)cookie, buffer, size); + } + + int writefn(void *cookie, const char *buffer, int size) { + return ::write((int)(ptrdiff_t)cookie, buffer, size); + } + + + FILE *file_stream(int index, int fd) { + if (fd < 0) { + switch (index) { + case 0: return stdin; + case 1: return stdout; + case 2: return stderr; + default: + return stderr; + } + } + // will not close. + return funopen((const void *)(ptrdiff_t)fd, readfn, writefn, nullptr, nullptr); + } + + + class io_helper { + + public: + FILE *in; + FILE *out; + FILE *err; + + io_helper(const fdmask &fds) { + in = file_stream(0, fds[0]); + out = file_stream(1, fds[1]); + err = file_stream(2, fds[2]); + } + + ~io_helper() { + #define __(x, target) if (x != target) fclose(x) + __(in, stdin); + __(out, stdout); + __(err, stderr); + #undef __ + } + + io_helper() = delete; + io_helper(const io_helper &) = delete; + io_helper &operator=(const io_helper &) = delete; + }; + + +} + +#undef stdin +#undef stdout +#undef stderr + +#define stdin io.in +#define stdout io.out +#define stderr io.err + +int builtin_unset(const std::vector &tokens, const fdmask &) { + for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) { + + std::string name = *iter; + lowercase(name); + + Environment.erase(name); + } + // unset [no arg] removes ALL variables + if (tokens.size() == 1) { + Environment.clear(); + } + return 0; +} + + +int builtin_set(const std::vector &tokens, const fdmask &fds) { + // set var name -- set + // set var -- just print the value + + // 3.5 supports -e to also export it. + + io_helper io(fds); + + + if (tokens.size() == 1) { + + for (const auto &kv : Environment) { + std::string name = quote(kv.first); + std::string value = quote(kv.second); + + fprintf(stdout, "Set %s%s %s\n", + bool(kv.second) ? "-e " : "", + name.c_str(), value.c_str()); + } + return 0; + } + + if (tokens.size() == 2) { + std::string name = tokens[1]; + lowercase(name); + auto iter = Environment.find(name); + if (iter == Environment.end()) { + fprintf(stderr, "### Set - No variable definition exists for %s.\n", name.c_str()); + return 2; + } + + name = quote(name); + std::string value = quote(iter->second); + fprintf(stdout, "Set %s%s %s\n", + bool(iter->second) ? "-e " : "", + name.c_str(), value.c_str()); + return 0; + } + + bool exported = false; + + + if (tokens.size() == 4 && tokens[1] == "-e") { + exported = true; + } + + if (tokens.size() > 3 && !exported) { + fputs("### Set - Too many parameters were specified.\n", stderr); + fputs("# Usage - set [name [value]]\n", stderr); + return 1; + } + + std::string name = tokens[1+exported]; + std::string value = tokens[2+exported]; + lowercase(name); + + Environment[name] = std::move(EnvironmentEntry(std::move(value), exported)); + return 0; +} + + + +static int export_common(bool export_or_unexport, const std::vector &tokens, io_helper &io) { + + const char *name = export_or_unexport ? "Export" : "Unexport"; + + struct { + int _r = 0; + int _s = 0; + } flags; + bool error = false; + + std::vector argv = getopt(tokens, [&](char c){ + switch(c) { + case 'r': + case 'R': + flags._r = true; + break; + case 's': + case 'S': + flags._s = true; + break; + default: + fprintf(stderr, "### %s - \"-%c\" is not an option.\n", name, c); + error = true; + break; + } + }); + + if (error) { + fprintf(stderr, "# Usage - %s [-r | -s | name...]\n", name); + return 1; + } + + if (argv.empty()) { + if (flags._r && flags._s) goto conflict; + + // list of exported vars. + // -r will generate unexport commands for exported variables. + // -s will only print the names. + + + name = export_or_unexport ? "Export " : "Unexport "; + + for (const auto &kv : Environment) { + const std::string& vname = kv.first; + if (kv.second == export_or_unexport) + fprintf(stdout, "%s%s\n", flags._s ? "" : name, quote(vname).c_str()); + } + return 0; + } + else { + // mark as exported. + + if (flags._r || flags._s) goto conflict; + + for (std::string s : argv) { + lowercase(s); + auto iter = Environment.find(s); + if (iter != Environment.end()) iter->second = export_or_unexport; + } + return 0; + } + +conflict: + fprintf(stderr, "### %s - Conflicting options or parameters were specified.\n", name); + fprintf(stderr, "# Usage - %s [-r | -s | name...]\n", name); + return 1; +} +int builtin_export(const std::vector &tokens, const fdmask &fds) { + + io_helper io(fds); + return export_common(true, tokens, io); +} + +int builtin_unexport(const std::vector &tokens, const fdmask &fds) { + + io_helper io(fds); + return export_common(false, tokens, io); +} + + + +int builtin_echo(const std::vector &tokens, const fdmask &fds) { + + io_helper io(fds); + + bool space = false; + bool n = false; + + for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) { + + const std::string &s = *iter; + if (s == "-n" || s == "-N") { + n = true; + continue; + } + if (space) { + fputs(" ", stdout); + } + fputs(s.c_str(), stdout); + space = true; + } + if (!n) fputs("\n", stdout); + return 0; +} + +int builtin_quote(const std::vector &tokens, const fdmask &fds) { + // todo... + + io_helper io(fds); + + bool space = false; + bool n = false; + + for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) { + + std::string s = *iter; + if (s == "-n" || s == "-N") { + n = true; + continue; + } + if (space) { + fputs(" ", stdout); + } + s = quote(std::move(s)); + fputs(s.c_str(), stdout); + space = true; + } + if (!n) fputs("\n", stdout); + return 0; +} + +int builtin_parameters(const std::vector &argv, const fdmask &fds) { + + io_helper io(fds); + + int i = 0; + for (const auto &s : argv) { + fprintf(stdout, "{%d} %s\n", i++, s.c_str()); + } + return 0; +} + + +int builtin_directory(const std::vector &tokens, const fdmask &fds) { + // directory [-q] + // directory path + + // for relative names, uses {DirectoryPath} (if set) rather than . + // set DirectoryPath ":,{MPW},{MPW}Projects:" + + io_helper io(fds); + + bool q = false; + bool error = false; + + std::vector argv = getopt(tokens, [&](char c){ + switch(c) + { + case 'q': + case 'Q': + q = true; + break; + default: + fprintf(stderr, "### Directory - \"-%c\" is not an option.\n", c); + error = true; + break; + } + }); + + if (error) { + fputs("# Usage - Directory [-q | directory]\n", stderr); + return 1; + } + + if (argv.size() > 1) { + fputs("### Directory - Too many parameters were specified.\n", stderr); + fputs("# Usage - Directory [-q | directory]\n", stderr); + return 1; + } + + + if (argv.size() == 1) { + //cd + if (q) { + fputs("### Directory - Conflicting options or parameters were specified.\n", stderr); + return 1; + } + + return 0; + } + else { + // pwd + return 0; + } +} + +static bool is_assignment(int type) { + switch(type) + { + case '=': + case '+=': + case '-=': + return true; + default: + return false; + } +} + +int builtin_evaluate(std::vector &&tokens, const fdmask &fds) { + // evaluate expression + // evaluate variable = expression + // evaluate variable += expression + // evaluate variable -= expression + + // flags -- -h -o -b -- print in hex, octal, or binary + + // convert the arguments to a stack. + + + int output = 'd'; + + io_helper io(fds); + + std::reverse(tokens.begin(), tokens.end()); + + // remove 'Evaluate' + tokens.pop_back(); + + // check for -h -x -o + if (tokens.size() >= 2 && tokens.back().type == '-') { + + const token &t = tokens[tokens.size() - 2]; + if (t.type == token::text && t.string.length() == 1) { + int flag = tolower(t.string[0]); + switch(flag) { + case 'o': + case 'h': + case 'b': + output = flag; + tokens.pop_back(); + tokens.pop_back(); + } + } + + } + + if (tokens.size() >= 2 && tokens.back().type == token::text) + { + int type = tokens[tokens.size() -2].type; + + if (is_assignment(type)) { + + std::string name = tokens.back().string; + lowercase(name); + + tokens.pop_back(); + tokens.pop_back(); + + int32_t i = evaluate_expression("Evaluate", std::move(tokens)); + + switch(type) { + case '=': + Environment[name] = std::to_string(i); + break; + case '+=': + case '-=': + { + value old; + auto iter = Environment.find(name); + if (iter != Environment.end()) old = (const std::string &)iter->second; + + switch(type) { + case '+=': + i = old.to_number() + i; + break; + case '-=': + i = old.to_number() - i; + break; + } + + std::string s = std::to_string(i); + if (iter == Environment.end()) + Environment.emplace(std::move(name), std::move(s)); + else iter->second = std::move(s); + + } + break; + } + return 0; + } + } + + int32_t i = evaluate_expression("Evaluate", std::move(tokens)); + + // todo -- format based on -h, -o, or -b flag. + if (output == 'h') { + fprintf(stdout, "0x%08x\n", i); + return 0; + } + if (output == 'b') { + fputc('0', stdout); + fputc('b', stdout); + for (int j = 0; j < 32; ++j) { + fputc(i & 0x80000000 ? '1' : '0', stdout); + i <<= 1; + } + fputc('\n', stdout); + return 0; + + } + if (output == 'o') { + // octal. + fprintf(stdout, "0%o\n", i); + return 0; + } + fprintf(stdout, "%d\n", i); + return 0; +} diff --git a/mpw-shell-command.rl b/mpw-shell-command.rl new file mode 100644 index 0000000..8250090 --- /dev/null +++ b/mpw-shell-command.rl @@ -0,0 +1,123 @@ +#include +#include +#include +#include + +#include +#include + +#include "mpw-shell.h" + +%%{ + machine classify; + alphtype unsigned char; + + ws = [ \t]; + + IF = /if/i; + ELSE = /else/i; + END = /end/i; + EVALUATE = /evaluate/i; + + + main := |* + IF %eof{ return command_if; }; + IF ws => {return command_if; }; + + ELSE %eof{ return command_else;}; + ELSE ws => { return command_else; }; + + ELSE ws+ IF %eof{ return command_else_if; }; + ELSE ws+ IF ws => {return command_else_if; }; + + END %eof{ return command_end; }; + END ws => {return command_end; }; + + EVALUATE %eof{ return command_evaluate; }; + EVALUATE ws => {return command_evaluate; }; + + + *|; + +}%% + + +int classify(const std::string &line) { + + %% write data; + + int cs; + int act; + + const unsigned char *p = (const unsigned char *)line.data(); + const unsigned char *pe = (const unsigned char *)line.data() + line.size(); + const unsigned char *eof = pe; + const unsigned char *te, *ts; + + %%write init; + + %%write exec; + + return 0; +} + + +/* + * Generates a linked-list of commands. Why? Because it also checks + * for shell-special syntax (currently if / else /end only) and + * adds pointers to make executing them easier. + * + */ +command_ptr build_command(const std::vector &lines) { + + std::vector if_stack; + + command_ptr head; + command_ptr prev; + + for (const auto &line : lines) { + if (line.empty()) continue; + + int type = classify(line); + command_ptr c = std::make_shared(type, line); + + if (!head) head = c; + if (!prev) prev = c; + else { + prev->next = c; + prev = c; + } + + // if stack... + switch (type) { + case command_if: + if_stack.push_back(c); + break; + + case command_else: + case command_else_if: + + if (if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Else must be within if ... end."); + } + + if_stack.back()->alternate = c; + if_stack.back() = c; + break; + + case command_end: + if (if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Extra end command."); + } + if_stack.back()->alternate = c; + if_stack.pop_back(); + break; + } + } + + if (!if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Unterminated if command."); + } + + return head; +} diff --git a/mpw-shell-commands.c b/mpw-shell-commands.c new file mode 100644 index 0000000..922196e --- /dev/null +++ b/mpw-shell-commands.c @@ -0,0 +1,301 @@ + +#line 1 "mpw-shell-commands.rl" +#include +#include +#include +#include + +#include + +typedef std::shared_ptr command_ptr; +typedef std::weak_ptr weak_command_ptr; + +class command { + enum type { + command_if = 1, + command_else, + command_else_if, + command_end + } = 0; + std::string line; + command_ptr next; + weak_command_ptr alternate; // if -> else -> end. +}; + + + +#line 49 "mpw-shell-commands.rl" + + + +int classify(const std::string &line) { + + +#line 35 "mpw-shell-commands.c" +static const int classify_start = 8; +static const int classify_first_final = 8; +static const int classify_error = 0; + +static const int classify_en_main = 8; + + +#line 55 "mpw-shell-commands.rl" + + int cs; + const unsigned char *p = (const unsigned char *)line.data(); + const unsigned char *pe = (const unsigned char *)line.data() + line.size(); + const unsigned char *eof = pe; + const unsigned char *te, *ts; + + +#line 52 "mpw-shell-commands.c" + { + cs = classify_start; + ts = 0; + te = 0; + act = 0; + } + +#line 63 "mpw-shell-commands.rl" + + +#line 63 "mpw-shell-commands.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +tr5: +#line 40 "mpw-shell-commands.rl" + {{p = ((te))-1;}{ return command_else; }} + goto st8; +tr13: +#line 39 "mpw-shell-commands.rl" + { return command_else;} +#line 39 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr14: +#line 39 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr16: +#line 40 "mpw-shell-commands.rl" + {te = p;p--;{ return command_else; }} + goto st8; +tr17: +#line 42 "mpw-shell-commands.rl" + { return command_else_if; } +#line 42 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr18: +#line 42 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr19: +#line 43 "mpw-shell-commands.rl" + {te = p+1;{return command_else_if; }} + goto st8; +tr20: +#line 45 "mpw-shell-commands.rl" + { return command_end; } +#line 45 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr21: +#line 45 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr22: +#line 46 "mpw-shell-commands.rl" + {te = p+1;{return command_end; }} + goto st8; +tr23: +#line 36 "mpw-shell-commands.rl" + { return command_if; } +#line 36 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr24: +#line 36 "mpw-shell-commands.rl" + {te = p;p--;} + goto st8; +tr25: +#line 37 "mpw-shell-commands.rl" + {te = p+1;{return command_if; }} + goto st8; +st8: +#line 1 "NONE" + {ts = 0;} + if ( ++p == pe ) + goto _test_eof8; +case 8: +#line 1 "NONE" + {ts = p;} +#line 137 "mpw-shell-commands.c" + switch( (*p) ) { + case 69u: goto st1; + case 73u: goto st7; + case 101u: goto st1; + case 105u: goto st7; + } + goto st0; +st0: +cs = 0; + goto _out; +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 76u: goto st2; + case 78u: goto st6; + case 108u: goto st2; + case 110u: goto st6; + } + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 83u: goto st3; + case 115u: goto st3; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + switch( (*p) ) { + case 69u: goto st9; + case 101u: goto st9; + } + goto st0; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + switch( (*p) ) { + case 9u: goto tr15; + case 32u: goto tr15; + } + goto tr14; +tr15: +#line 1 "NONE" + {te = p+1;} + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 194 "mpw-shell-commands.c" + switch( (*p) ) { + case 9u: goto st4; + case 32u: goto st4; + case 73u: goto st5; + case 105u: goto st5; + } + goto tr16; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 9u: goto st4; + case 32u: goto st4; + case 73u: goto st5; + case 105u: goto st5; + } + goto tr5; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + switch( (*p) ) { + case 70u: goto st11; + case 102u: goto st11; + } + goto tr5; +st11: + if ( ++p == pe ) + goto _test_eof11; +case 11: + switch( (*p) ) { + case 9u: goto tr19; + case 32u: goto tr19; + } + goto tr18; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 68u: goto st12; + case 100u: goto st12; + } + goto st0; +st12: + if ( ++p == pe ) + goto _test_eof12; +case 12: + switch( (*p) ) { + case 9u: goto tr22; + case 32u: goto tr22; + } + goto tr21; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + switch( (*p) ) { + case 70u: goto st13; + case 102u: goto st13; + } + goto st0; +st13: + if ( ++p == pe ) + goto _test_eof13; +case 13: + switch( (*p) ) { + case 9u: goto tr25; + case 32u: goto tr25; + } + goto tr24; + } + _test_eof8: cs = 8; goto _test_eof; + _test_eof1: cs = 1; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof11: cs = 11; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof12: cs = 12; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof13: cs = 13; goto _test_eof; + + _test_eof: {} + if ( p == eof ) + { + switch ( cs ) { + case 9: goto tr13; + case 10: goto tr16; + case 4: goto tr5; + case 5: goto tr5; + case 11: goto tr17; + case 12: goto tr20; + case 13: goto tr23; + } + } + + _out: {} + } + +#line 65 "mpw-shell-commands.rl" + + return 0; +} diff --git a/mpw-shell-execute.cpp b/mpw-shell-execute.cpp new file mode 100644 index 0000000..d216673 --- /dev/null +++ b/mpw-shell-execute.cpp @@ -0,0 +1,295 @@ +#include "mpw-shell.h" +#include "fdset.h" +#include "value.h" + +#include +#include +#include + +#include +#include + +#include +#include +#include + + +/* + * Relevant shell variables (not currently supported) + * + * Echo {Echo} # control the echoing of commands to diagnostic output + * Echo {Exit} # control script termination based on {Status} + * + */ + +typedef std::vector vs; + +namespace { + + std::string &lowercase(std::string &s) { + std::transform(s.begin(), s.end(), s.begin(), [](char c){ return std::tolower(c); }); + return s; + } + + std::unordered_map &, const fdmask &)> builtins = { + {"directory", builtin_directory}, + {"echo", builtin_echo}, + {"parameters", builtin_parameters}, + {"quote", builtin_quote}, + {"set", builtin_set}, + {"unset", builtin_unset}, + {"export", builtin_export}, + {"unexport", builtin_unexport}, + }; + +} + +typedef std::pair icp; + + + +icp execute_all(command_ptr cmd); + +// returns status and pointer to the next command to execute. +icp execute_if(command_ptr cmd) { + + assert(cmd && cmd->type == command_if); + // evaluate condition... + // skip to else or end. + + command_ptr head(cmd); + + int status = 0; + + // find the end pointer. + // if ... end > file.text + // redirects all output within the block. + + command_ptr end = head; + while (end && end->type != command_end) { + end = end->alternate.lock(); + } + + + fdmask fds; // todo -- inherit from block, can be parsed from end line. + + + + fprintf(stdout, " %s ... %s\n", cmd->string.c_str(), end ? end->string.c_str() : ""); + + // todo -- indent levels. + while(cmd && cmd->type != command_end) { + + int32_t e; + + std::string s = cmd->string; + s = expand_vars(s, Environment); + + auto tokens = tokenize(s, true); + + std::reverse(tokens.begin(), tokens.end()); + e = 0; + status = 0; + switch(cmd->type) { + case command_else_if: + tokens.pop_back(); + case command_if: + tokens.pop_back(); + try { + e = evaluate_expression("If", std::move(tokens)); + } catch (std::exception &ex) { + fprintf(stderr, "%s\n", ex.what()); + status = -5; + } + break; + case command_else: + e = 1; + if (tokens.size() > 1) { + fprintf(stderr, "### Else - Missing if keyword.\n"); + fprintf(stderr, "# Usage - Else [if expression...]\n"); + e = 0; + status = -3; + } + } + + + if (e) { + command_ptr tmp; + std::tie(status, tmp) = execute_all(cmd->next); + break; + } + // skip to next condition. + cmd = cmd->alternate.lock(); + } + + // todo -- print but don't execute remaining alternates + + // print the end tokens... [ doesn't include other tokens.] + fprintf(stdout, " End\n"); + + return std::make_pair(status, end); // return end token -- will advance later. + +} + +int execute_evaluate(command_ptr cmd) { + + fdmask fds; // todo -- inherit from block. + + std::string s = cmd->string; + s = expand_vars(s, Environment); + + + fprintf(stdout, " %s\n", s.c_str()); + + auto tokens = tokenize(s, true); + + return builtin_evaluate(std::move(tokens), fds); +} + + +int execute_external(const std::vector &argv, const fdmask &fds) { + + std::vector cargv; + cargv.reserve(argv.size() + 3); + + int status; + int pid; + + cargv.push_back((char *)"mpw"); + //cargv.push_back((char *)"--shell"); + + unsigned offset = cargv.size(); + + + std::transform(argv.begin(), argv.end(), std::back_inserter(cargv), + [](const std::string &s) { return strdup(s.c_str()); } + ); + + cargv.push_back(nullptr); + + + pid = fork(); + if (pid < 0) { + perror("fork: "); + exit(EX_OSERR); + } + + + if (pid == 0) { + + // also export environment... + + // handle any indirection... + fds.dup(); + + execvp(cargv.front(), cargv.data()); + perror("execvp: "); + exit(EX_OSERR); + } + + std::for_each(cargv.begin()+offset, cargv.end(), free); + + for(;;) { + int status; + pid_t ok; + ok = waitpid(pid, &status, 0); + if (ok < 0) { + if (errno == EINTR) continue; + perror("waitpid:"); + exit(EX_OSERR); + } + + if (WIFEXITED(status)) return WEXITSTATUS(status); + if (WIFSIGNALED(status)) return -1; + fprintf(stderr, "waitpid - unexpected result\n"); + exit(EX_OSERR); + } + +} + +int execute_one(command_ptr cmd) { + + if (!cmd) return 0; + + assert(cmd && cmd->type == 0); + + + // todo -- before variable expansion, + // expand |, ||, && control structures. + // (possibly when classifing.) + + std::string s = cmd->string; + s = expand_vars(s, Environment); + + + fprintf(stdout, " %s\n", s.c_str()); + + auto tokens = tokenize(s); + + process p; + parse_tokens(std::move(tokens), p); + + + + fdmask fds = p.fds.to_mask(); + + std::string name = p.arguments.front(); + lowercase(name); + + auto iter = builtins.find(name); + if (iter != builtins.end()) { + int status = iter->second(p.arguments, fds); + return status; + } + + + return execute_external(p.arguments, fds); + + return 0; +} + +icp execute_all(command_ptr cmd) { + if (!cmd) return std::make_pair(0, cmd); + + int status; + + while(cmd) { + + unsigned type = cmd->type; + switch(type) + { + case command_evaluate: + status = execute_evaluate(cmd); + break; + + default: + status = execute_one(cmd); + break; + + case command_if: + std::tie(status, cmd) = execute_if(cmd); + break; + + case command_end: + case command_else: + case command_else_if: + return std::make_pair(status, cmd); + } + + Environment["status"] = std::to_string(status); + + if (status != 0) { + // only if Environment["Exit"] ? + throw std::runtime_error("### MPW Shell - Execution of input terminated."); + } + cmd = cmd->next; + } + + return std::make_pair(status, cmd); +} + +int execute(command_ptr cmd) { + int status; + std::tie(status, cmd) = execute_all(cmd); + return status; +} diff --git a/mpw-shell-expand.rl b/mpw-shell-expand.rl new file mode 100644 index 0000000..1933436 --- /dev/null +++ b/mpw-shell-expand.rl @@ -0,0 +1,132 @@ + + +#include +#include +#include + +#include + +#include "mpw-shell.h" + + +%%{ + machine line_parser; + alphtype unsigned char; + + escape = 0xb6; + ws = [ \t]; + nl = '\n'; + + action push_back { + line.push_back(fc); + } + action push_back_escape { + line.push_back(escape); + line.push_back(fc); + } + + + sstring = + ['] $push_back + ( (any-nl-[']) $push_back )* + ['] $push_back + $err{ + fprintf(stderr, "### MPW Shell - 's must occur in pairs.\n"); + } + ; + + # same quoting logic as ' string + vstring = + '{' + ( (any-nl-'}') ${var.push_back(fc); } )* + '}' + ${ + if (!var.empty()) { + + // flag to pass through vs "" ? + auto iter = env.find(var); + if (iter == env.end()) { + line.push_back('{'); + line.append(var); + line.push_back('}'); + } + else { + line.append((std::string)iter->second); + } + } + var.clear(); + } + $err{ + fprintf(stderr, "### MPW Shell - {s must occur in pairs.\n"); + } + ; + + + # double-quoted string. + # escape \n is ignored. others do nothing. + dstring = + ["] $push_back + ( + escape ( + nl ${ /* esc newline */ } + | + (any-nl) $push_back_escape + ) + | + vstring + | + (any-escape-nl-["{]) $push_back + )* ["] $push_back + $err{ + fprintf(stderr, "### MPW Shell - \"s must occur in pairs.\n"); + } + ; + + + main := + ( + sstring + | + dstring + | + vstring + | + escape any $push_back_escape + | + (any-['"{]) $push_back + )* + ; + + + + +}%% + + + +%% write data; + + +/* + * has to be done separately since you can do dumb stuff like: + * set q '"' ; echo {q} dsfsdf" + */ + +std::string expand_vars(const std::string &s, const std::unordered_map &env) { + + if (s.find('{') == s.npos) return s; + std::string var; + std::string line; + + int cs; + const unsigned char *p = (const unsigned char *)s.data(); + const unsigned char *pe = (const unsigned char *)s.data() + s.size(); + const unsigned char *eof = pe; + + %%write init; + + %%write exec; + + return line; +} + diff --git a/mpw-shell-parser.cpp b/mpw-shell-parser.cpp new file mode 100644 index 0000000..ccfcf85 --- /dev/null +++ b/mpw-shell-parser.cpp @@ -0,0 +1,406 @@ +#include "mpw-shell.h" +#include "fdset.h" +#include "value.h" + +#include +#include + +/* + * I'm sick of fighting with lemon. Just generate it by hand. + * + */ + + + + + +template +T pop(std::vector &v) { + T t = std::move(v.back()); + v.pop_back(); + return t; +} + +int open(const std::string &name, int flags) { + + // dup2 does not copy the O_CLOEXEC flag so it's safe to use. + + int fd = ::open(name.c_str(), flags | O_CLOEXEC, 0666); + if (fd < 0) { + std::string error = "### MPW Shell - Unable to open "; + error.push_back('"'); + error.append(name); + error.push_back('"'); + error.push_back('.'); + throw std::runtime_error(error); + } + return fd; +} + +void parse_tokens(std::vector &&tokens, process &p) { + + + fdset fds; + std::vector argv; + + std::reverse(tokens.begin(), tokens.end()); + argv.reserve(tokens.size()); + + // first token is always treated as a string. + token t = pop(tokens); + argv.emplace_back(std::move(t.string)); + + while(!tokens.empty()) { + + t = pop(tokens); + + switch (t.type) { + + // >, >> -- redirect stdout. + case '>': + case '>>': + { + int flags; + if (t.type == '>') flags = O_WRONLY | O_CREAT | O_TRUNC; + else flags = O_WRONLY | O_CREAT | O_APPEND; + + if (tokens.empty()) { + throw std::runtime_error("### MPW Shell - Missing file name."); + } + token name = pop(tokens); + int fd = open(name.string, flags); + fds.set(1, fd); + } + break; + + // < -- redirect stdin. + case '<': + { + int flags = O_RDONLY; + + if (tokens.empty()) { + throw std::runtime_error("### MPW Shell - Missing file name."); + } + token name = pop(tokens); + int fd = open(name.string, flags); + fds.set(0, fd); + } + break; + + default: + argv.emplace_back(std::move(t.string)); + break; + } + } + + p.arguments = std::move(argv); + p.fds = std::move(fds); +} + + +class expression_parser { + +public: + + expression_parser(const std::string &n, std::vector &&t) : + name(n), tokens(std::move(t)) + {} + + expression_parser(const expression_parser &) = delete; + expression_parser(expression_parser &&) = delete; + + expression_parser& operator=(const expression_parser &) = delete; + expression_parser& operator=(expression_parser &&) = delete; + + // returns integer value of the expression. + int32_t evaluate(); + + +private: + + value terminal(); + value unary(); + value binary(); + + + value eval(int op, value &lhs, value &rhs); + + [[noreturn]] void expect_binary_operator(); + [[noreturn]] void end_of_expression(); + [[noreturn]] void divide_by_zero(); + + int peek_type() const; + token next(); + static int precedence(int); + + void skip() { + if (!tokens.empty()) tokens.pop_back(); + } + + const std::string &name; + std::vector tokens; +}; + +int expression_parser::peek_type() const { + if (tokens.empty()) return token::eof; + return tokens.back().type; +} + +token expression_parser::next() { + if (tokens.empty()) return token("", token::eof); // error? + return pop(tokens); +} + +void expression_parser::expect_binary_operator() { + token t = next(); + + std::string error; + error = "### " + name; + error += " - Expected a binary operator when \""; + error += t.string; + error += "\" was encountered."; + throw std::runtime_error(error); +} + +void expression_parser::end_of_expression() { + std::string error; + error = "### " + name + " - Unexpected end of expression."; + throw std::runtime_error(error); +} + +void expression_parser::divide_by_zero() { + std::string error; + error = "### " + name + " - Attempt to divide by zero."; + throw std::runtime_error(error); +} + + +value expression_parser::binary() { + + std::vector output; + std::vector> operators; + + value v = unary(); + + output.emplace_back(std::move(v)); + + for(;;) { + + // check for an operator. + + int type = peek_type(); + if (type == token::eof) break; + if (type == ')') break; + + int p = precedence(type); + if (!p) expect_binary_operator(); + skip(); + + while (!operators.empty() && operators.back().second <= p) { + // reduce top ops. + int op = operators.back().first; + operators.pop_back(); + value rhs = pop(output); + value lhs = pop(output); + + output.emplace_back(eval(op, lhs, rhs)); + } + + + operators.push_back(std::make_pair(type, p)); + + v = unary(); + + output.emplace_back(std::move(v)); + + } + + // reduce... + while (!operators.empty()) { + + int op = pop(operators).first; + value rhs = pop(output); + value lhs = pop(output); + + output.emplace_back(eval(op, lhs, rhs)); + } + + if (output.size() != 1) throw std::runtime_error("binary stack error"); + return pop(output); +} + +int expression_parser::precedence(int op) { + switch (op) { + + case '*': + case '%': + case '/': + return 3; + + case '+': + case '-': + return 4; + + case '>>': + case '<<': + return 5; + + case '<': + case '<=': + case '>': + case '>=': + return 6; + + case '==': + case '!=': + case token::equivalent: + case token::not_equivalent: + return 7; + case '&': + return 8; + case '^': + return 9; + case '|': + return 10; + case '&&': + return 11; + case '||': + return 12; + } + return 0; + //throw std::runtime_error("unimplemented op";); +} + +value expression_parser::eval(int op, value &lhs, value &rhs) { + switch (op) { + + case '*': + return lhs.to_number() * rhs.to_number(); + + case '/': + if (!rhs.to_number()) divide_by_zero(); + return lhs.to_number() / rhs.to_number(); + + case '%': + if (!rhs.to_number()) divide_by_zero(); + return lhs.to_number() % rhs.to_number(); + + + case '+': + return lhs.to_number() + rhs.to_number(); + case '-': + return lhs.to_number() - rhs.to_number(); + case '>': + return lhs.to_number() > rhs.to_number(); + case '<': + return lhs.to_number() < rhs.to_number(); + + case '<=': + return lhs.to_number() <= rhs.to_number(); + + case '>=': + return lhs.to_number() >= rhs.to_number(); + + case '>>': + return lhs.to_number() >> rhs.to_number(); + + case '<<': + return lhs.to_number() >> rhs.to_number(); + + // logical || . NaN ok + case '||': + return lhs.to_number(1) || rhs.to_number(1); + + // logical && . NaN ok + case '&&': + return lhs.to_number(1) && rhs.to_number(1); + + case '|': + return lhs.to_number() | rhs.to_number(); + + case '&': + return lhs.to_number() & rhs.to_number(); + + case '^': + return lhs.to_number() ^ rhs.to_number(); + + case '==': + // string ==. 0x00==0 -> 0 + // as a special case, 0=="". go figure. + if (lhs.string == "" && rhs.string == "0") return 1; + if (lhs.string == "0" && rhs.string == "") return 1; + return lhs.string == rhs.string; + + case '!=': + if (lhs.string == "" && rhs.string == "0") return 0; + if (lhs.string == "0" && rhs.string == "") return 0; + return lhs.string != rhs.string; + + + } + // todo... + throw std::runtime_error("unimplemented op"); +} + +value expression_parser::unary() { + + int type = peek_type(); + + switch (type) { + case '-': + case '+': + case '!': + case '~': + next(); + value v = unary(); + // + is a nop.. doesn't even check if it's a number. + if (type == '-') v = -v.to_number(); + if (type == '~') v = ~v.to_number(); + if (type == '!') v = !v.to_number(1); // logical !, NaN ok. + + return v; + } + + return terminal(); +} + +value expression_parser::terminal() { + + int type = peek_type(); + + if (type == token::text) { + token t = next(); + return value(std::move(t.string)); + } + + if (type == '(') { + next(); + value v = binary(); + type = peek_type(); + if (type != ')') { + end_of_expression(); + } + next(); + return v; + } + // insert a fake token. + return value(); +} + +int32_t expression_parser::evaluate() { + if (tokens.empty()) return 0; + + value v = binary(); + if (!tokens.empty()) { + if (tokens.back().type == ')') + throw std::runtime_error("### MPW Shell - Extra ) command."); + throw std::runtime_error("evaluation stack error."); // ?? should be caught above. + } + return v.to_number(1); +} + +int32_t evaluate_expression(const std::string &name, std::vector &&tokens) { + + expression_parser p(name, std::move(tokens)); + return p.evaluate(); +} diff --git a/mpw-shell-quote.rl b/mpw-shell-quote.rl new file mode 100644 index 0000000..602175f --- /dev/null +++ b/mpw-shell-quote.rl @@ -0,0 +1,75 @@ +#include + +bool must_quote(const std::string &s){ +%%{ + + machine must_quote; + alphtype unsigned char; + + quotable = ( + [ \t\r\n] + | + 0x00 + | + [0x80-0xff] + | + [+#;&|()'"/\\{}`?*<>] + | + '-' + | + '[' + | + ']' + ); + + #simpler just to say what's ok. + normal = [A-Za-z0-9_.:]; + + main := + ( + normal + | + (any-normal) ${return true;} + )* + ; +}%% + + %%write data; + + int cs; + const unsigned char *p = (const unsigned char *)s.data(); + const unsigned char *pe = (const unsigned char *)s.data() + s.size(); + const unsigned char *eof = nullptr; + + %%write init; + %%write exec; + return false; +} + +#if 0 +std::string quote(const std::string &s) { + std::string tmp(s); + return quote(std::move(tmp)); +} +#endif + +std::string quote(const std::string &s) { + const char q = '\''; + const char *escape_q = "'\xd8''"; + + if (!must_quote(s)) return s; + + std::string out; + out.reserve(s.length() + (s.length() >> 1)); + out.push_back(q); + + for (char c : s) { + if (c == q) { + out.append(escape_q); + } else + out.push_back(c); + } + + out.push_back(q); + return out; +} diff --git a/mpw-shell-read.rl b/mpw-shell-read.rl new file mode 100644 index 0000000..fe0c637 --- /dev/null +++ b/mpw-shell-read.rl @@ -0,0 +1,374 @@ +#include "mpw-shell.h" + +#include +#include + + +%%{ + machine classify; + alphtype unsigned char; + + ws = [ \t]; + + IF = /if/i; + ELSE = /else/i; + END = /end/i; + BEGIN = /begin/i; + EVALUATE = /evaluate/i; + + + main := |* + IF %eof{ return command_if; }; + IF ws => {return command_if; }; + + ELSE %eof{ return command_else;}; + ELSE ws => { return command_else; }; + + ELSE ws+ IF %eof{ return command_else_if; }; + ELSE ws+ IF ws => {return command_else_if; }; + + END %eof{ return command_end; }; + END ws => {return command_end; }; + + EVALUATE %eof{ return command_evaluate; }; + EVALUATE ws => {return command_evaluate; }; + + + *|; + +}%% + + +static int classify(const std::string &line) { + + %%machine classify; + %% write data; + + int cs; + int act; + + const unsigned char *p = (const unsigned char *)line.data(); + const unsigned char *pe = (const unsigned char *)line.data() + line.size(); + const unsigned char *eof = pe; + const unsigned char *te, *ts; + + %%write init; + + %%write exec; + + return 0; +} + + +/* + * this state machine splits input into lines. + * only new-line escapes are removed. + * "", '', and {} are also matched. + * + */ + +/* + * from experimentation, mpw splits on ; after variable expansion; + * this splits before. something stupid like: + * set q '"'; echo {q} ; " + * will not be handled correctly. oh well. + * (should probably just drop that and we can then combine tokenizing w/ + * variable expansion) + */ +%%{ + machine line_parser; + alphtype unsigned char; + + + escape = 0xb6; + ws = [ \t]; + nl = ('\n' | '\r'); + + action add_line { + /* strip trailing ws */ + while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back(); + if (!scratch.empty()) { + command_ptr cmd = std::make_shared(std::move(scratch)); + cmd->line = start_line; + start_line = line; + program.emplace_back(std::move(cmd)); + } + scratch.clear(); + fgoto main; + } + + action push_back { + scratch.push_back(fc); + } + + action push_back_escape { + scratch.push_back(escape); + scratch.push_back(fc); + } + + comment = '#' (any-nl)*; + + escape_seq = + escape + ( + nl ${ /* esc newline */ line++; } + | + (any-nl) $push_back_escape + ) + ; + + + # single-quoted string. only escape \n is special. + # handling is so stupid I'm not going to support it. + + sstring = + ['] $push_back + ( (any-nl-[']) $push_back )* + ['] $push_back + $err{ + throw std::runtime_error("### MPW Shell - 's must occur in pairs."); + } + ; + + # same quoting logic as ' string + vstring = + '{' $push_back + ( (any-nl-'}') $push_back )* + '}' $push_back + $err{ + throw std::runtime_error("### MPW Shell - {s must occur in pairs."); + } + ; + + + # double-quoted string. + # escape \n is ignored. others do nothing. + dstring = + ["] $push_back + ( + escape_seq + | + vstring + | + (any-escape-nl-["{]) $push_back + )* ["] $push_back + $err{ + throw std::runtime_error("### MPW Shell - \"s must occur in pairs."); + } + ; + + # this is a mess ... + coalesce_ws = + ws + ( + ws + | + escape nl ${ line++; } + )* + <: + any ${ scratch.push_back(' '); fhold; } + ; + + line := + ( + sstring + | + dstring + | + vstring + | + [;] $add_line + | + escape_seq + | + coalesce_ws + | + (any-escape-nl-ws-[;#'"{]) $push_back + )* + comment? + nl ${ line++; } $add_line + ; + + main := + # strip leading whitespace. + ws* + <: # left guard -- higher priority to ws. + any ${ fhold; fgoto line; } + ; + +}%% + + + + + + +class line_parser { + + public: + + void process(const void *data, size_t size) { + process((const unsigned char *)data, size, false); + } + + command_ptr finish() { + process((const unsigned char *)"\n\n", 2, true); + return build_program(); + } + + line_parser(); + + private: + + %% machine line_parser; + %% write data; + + + std::vector program; + std::string scratch; + int line = 1; + int cs; + + command_ptr build_program(); + void process(const unsigned char *data, size_t size, bool final); +}; + +line_parser::line_parser() { + %% machine line_parser; + %% write init; +} + +void line_parser::process(const unsigned char *data, size_t size, bool final) { + + int start_line; + + const unsigned char *p = data; + const unsigned char *pe = data + size; + const unsigned char *eof = nullptr; + + if (final) + eof = pe; + + start_line = line; + %% machine line_parser; + %% write exec; + + if (cs == line_parser_error) { + throw std::runtime_error("MPW Shell - Lexer error."); + + } + + if (cs != line_parser_start && final) { + // will this happen? + throw std::runtime_error("MPW Shell - Lexer error."); + } +} + + +/* + * Generates a linked-list of commands. Why? Because it also checks + * for shell-special syntax (currently if / else /end only) and + * adds pointers to make executing them easier. + * + */ + +// todo -- use recursive descent parser, support begin/end, (), ||, &&, etc. +command_ptr line_parser::build_program() { + + + std::vector if_stack; + + command_ptr head; + command_ptr ptr; + + if (program.empty()) return head; + + std::reverse(program.begin(), program.end()); + + head = program.back(); + + while (!program.empty()) { + + if (ptr) ptr->next = program.back(); + + ptr = std::move(program.back()); + program.pop_back(); + + int type = ptr->type = classify(ptr->string); + + ptr->level = if_stack.size(); + + // if stack... + switch (type) { + default: + break; + + case command_if: + if_stack.push_back(ptr); + break; + + case command_else: + case command_else_if: + + if (if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Else must be within if ... end."); + } + + ptr->level--; + if_stack.back()->alternate = ptr; + if_stack.back() = ptr; + break; + + case command_end: + if (if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Extra end command."); + } + + ptr->level--; + if_stack.back()->alternate = ptr; + if_stack.pop_back(); + break; + } + } + + if (!if_stack.empty()) { + throw std::runtime_error("### MPW Shell - Unterminated if command."); + } + + return head; +} + + +command_ptr read_fd(int fd) { + unsigned char buffer[1024]; + + line_parser p; + + for(;;) { + ssize_t s = read(fd, buffer, sizeof(buffer)); + if (s < 0) { + throw std::runtime_error("MPW Shell - Read error."); + } + p.process(buffer, s); + } + return p.finish(); +} + +command_ptr read_file(const std::string &name) { + int fd; + fd = open(name.c_str(), O_RDONLY); + if (fd < 0) { + throw std::runtime_error("MPW Shell - Unable to open file " + name + "."); + } + + auto tmp = read_fd(fd); + close(fd); + return tmp; +} + +command_ptr read_string(const std::string &s) { + line_parser p; + + p.process(s.data(), s.size()); + return p.finish(); +} diff --git a/mpw-shell-token.rl b/mpw-shell-token.rl new file mode 100644 index 0000000..4fa5706 --- /dev/null +++ b/mpw-shell-token.rl @@ -0,0 +1,258 @@ +#include +#include +#include + +#include "mpw-shell.h" + +%%{ + machine tokenizer; + alphtype unsigned char; + + + escape = 0xb6; + ws = [ \t]; + nl = '\n' | '\r'; + + action push_token { + if (!scratch.empty()) { + tokens.emplace_back(std::move(scratch)); + scratch.clear(); + } + } + + action push_back { + scratch.push_back(fc); + } + +# vstring_quoted = +# [{] +# ( (any-nl-[}]) ${ var.push_back(fc); } )* +# [}] +# %{ +# auto iter = Environment.find(var); +# if (iter != Environment.end() { +# scratch.append(iter->second); +# }) +# var.clear(); +# } +# $err{ +# throw std::runtime_error("### MPW Shell - '{ must occur in pairs."); +# } +# ; + +# vstring_unqoted = +# [{] +# ( (any-nl-[}]) ${ var.push_back(fc); } )* +# [}] +# %{ +# auto iter = Environment.find(var); +# if (iter != Environment.end() { +# // re-parse. ", ', { are not +# // special. all others are treated normally. +# }) +# var.clear(); +# } +# $err{ +# throw std::runtime_error("### MPW Shell - '{ must occur in pairs."); +# } +# ; + + sstring = + ['] + ( (any-nl-[']) $push_back )* + ['] + $err{ + throw std::runtime_error("### MPW Shell - 's must occur in pairs."); + } + ; + + escape_seq = + escape + ( + 'f' ${scratch.push_back('\f'); } + | + 'n' ${scratch.push_back('\n'); /* \r ? */ } + | + 't' ${scratch.push_back('\t'); } + | + any-[fnt] $push_back + ) + ; + + # double-quoted string. + dstring = + ["] + ( + escape_seq + | + (any-escape-["]) $push_back + )* + ["] + $err{ + throw std::runtime_error("### MPW Shell - \"s must occur in pairs."); + } + ; + + + action eval { eval } + + # > == start state (single char tokens or common prefix) + # % == final state (multi char tokens w/ unique prefix) + # $ == all states + + main := |* + ws+ >push_token; + '>>' %push_token => { tokens.emplace_back(">>", '>>'); }; + '>' %push_token => { tokens.emplace_back(">", '>'); }; + + '<' %push_token => { tokens.emplace_back("<", '<'); }; + + '||' %push_token => { tokens.emplace_back("||", '||'); }; + '|' %push_token => { tokens.emplace_back("|", '|'); }; + + '&&' + %push_token => { tokens.emplace_back("&&", '&&'); }; + + # eval-only. + + '(' when eval + %push_token => { tokens.emplace_back("(", '('); }; + + ')' when eval + %push_token => { tokens.emplace_back(")", ')'); }; + + + '<<' when eval + %push_token => { tokens.emplace_back("<<", '<<'); }; + + '<=' when eval + %push_token => { tokens.emplace_back("<=", '<='); }; + + '>=' when eval + %push_token => { tokens.emplace_back(">=", '>='); }; + + '==' when eval + %push_token => { tokens.emplace_back("==", '=='); }; + + '!=' when eval + %push_token => { tokens.emplace_back("!=", '!='); }; + + '&' when eval + %push_token => { tokens.emplace_back("&", '&'); }; + + '+' when eval + >push_token => { tokens.emplace_back("+", '+'); }; + + '*' when eval + %push_token => { tokens.emplace_back("*", '*'); }; + + '%' when eval + %push_token => { tokens.emplace_back("%", '%'); }; + + + '-' when eval + %push_token => { tokens.emplace_back("+", '-'); }; + + '!' when eval + %push_token => { tokens.emplace_back("!", '!'); }; + + '^' when eval + %push_token => { tokens.emplace_back("^", '^'); }; + + '~' when eval + %push_token => { tokens.emplace_back("~", '~'); }; + + + '=' when eval + %push_token => { tokens.emplace_back("=", '='); }; + + '+=' when eval + %push_token => { tokens.emplace_back("+=", '+='); }; + + '-=' when eval + %push_token => { tokens.emplace_back("-=", '-='); }; + + + sstring ; + dstring ; + escape_seq; + + (any-escape-['"]) => push_back; # { scratch.append(ts, te); }; + #(any-escape-ws-[>'"])+ => { scratch.append(ts, te); }; + *| + ; +}%% + + + +inline void replace_eval_token(token &t) { + +%%{ + + machine eval_keywords; + + main := + /and/i %{ t.type = '&&'; } + | + /or/i %{ t.type = '||'; } + | + /not/i %{ t.type = '!'; } + | + /div/i %{ t.type = '/'; } + | + /mod/i %{ t.type = '%'; } + ; +}%% + + + %%machine eval_keywords; + %%write data; + + + const char *p = t.string.data(); + const char *pe = t.string.data() + t.string.size(); + const char *eof = pe; + int cs; + %%write init; + + %%write exec; +} +std::vector tokenize(const std::string &s, bool eval) +{ + std::vector tokens; + std::string scratch; + + %%machine tokenizer; + %% write data; + + int cs, act; + unsigned const char *p = (const unsigned char *)s.data(); + unsigned const char *pe = (const unsigned char *)s.data() + s.size(); + unsigned const char *eof = pe; + + unsigned const char *ts, *te; + + %%write init; + + %%write exec; + + if (cs == tokenizer_error) { + throw std::runtime_error("MPW Shell - Lexer error."); + } + + if (!scratch.empty()) { + tokens.emplace_back(std::move(scratch)); + scratch.clear(); + } + + // alternate operator tokens for eval + if (eval) { + + for (token & t : tokens) { + if (t.type == token::text) replace_eval_token(t); + + } + } + + return tokens; +} diff --git a/mpw-shell.cpp b/mpw-shell.cpp new file mode 100644 index 0000000..3881daf --- /dev/null +++ b/mpw-shell.cpp @@ -0,0 +1,50 @@ + +#include +#include +#include + +#include +#include +#include +#include + +#include "mpw-shell.h" + + + + +std::unordered_map Environment; + + + + +// should set {MPW}, {MPWVersion}, then execute {MPW}StartUp +void init(void) { + Environment.emplace("status", std::string("0")); + Environment.emplace("exit", std::string("1")); // terminate script on error. +} + +int main(int argc, char **argv) { + + init(); + + command_ptr head; + + + try { + head = read_fd(0); + } catch (std::exception &ex) { + fprintf(stderr, "%s\n", ex.what()); + exit(1); + } + + + try { + int status = execute(head); + exit(status); + } catch(std::exception &ex) { + fprintf(stderr, "%s\n", ex.what()); + exit(1); + } + +} diff --git a/mpw-shell.h b/mpw-shell.h new file mode 100644 index 0000000..48f662b --- /dev/null +++ b/mpw-shell.h @@ -0,0 +1,159 @@ +#ifndef __mpw_shell_h__ +#define __mpw_shell_h__ + +#include +#include +#include +#include +#include + +class command; +typedef std::shared_ptr command_ptr; +typedef std::weak_ptr weak_command_ptr; + +const unsigned char escape = 0xb6; + +// environment has a bool which indicates if exported. +struct EnvironmentEntry { +public: + operator bool() const { return exported; } + operator bool&() { return exported; } + + operator const std::string&() const { return value; } + operator std::string&() { return value; } + + EnvironmentEntry() = default; + EnvironmentEntry(const EnvironmentEntry &) = default; + EnvironmentEntry(EnvironmentEntry &&) = default; + + EnvironmentEntry(const std::string &s, bool e = false) : value(s), exported(e) + {} + EnvironmentEntry(std::string &&s, bool e = false) : value(std::move(s)), exported(e) + {} + + ~EnvironmentEntry() = default; + + EnvironmentEntry& operator=(bool &rhs) { exported = rhs; return *this; } + EnvironmentEntry& operator=(const std::string &rhs) { value = rhs; return *this; } + EnvironmentEntry& operator=(const EnvironmentEntry &) = default; + EnvironmentEntry& operator=(EnvironmentEntry &&) = default; + +private: + std::string value; + bool exported = false; + +}; + +extern std::unordered_map Environment; + +enum { + command_if = 1, + command_else, + command_else_if, + command_end, + command_begin, + command_evaluate, +}; + +class command { + public: + unsigned type = 0; + unsigned line = 0; + unsigned level = 0; + + std::string string; + command_ptr next; + weak_command_ptr alternate; // if -> else -> end. weak to prevent cycles. + + command() = default; + command(command &&) = default; + command(const command &) = default; + + command(unsigned t, const std::string &s) : + type(t), string(s) + {} + + command(unsigned t, std::string &&s) : + type(t), string(std::move(s)) + {} + + command(const std::string &s) : string(s) + {} + + command(std::string &&s) : string(std::move(s)) + {} + + +}; + + +class token { +public: + enum { + text = 0, + eof, + equivalent, + not_equivalent, + // remainder are characters. + + }; + unsigned type = text; + std::string string; + + token() = default; + token(token &&) = default; + token(const token&) = default; + + token &operator=(token &&) = default; + token &operator=(const token &) = default; + + token(const std::string &s, unsigned t = text) : + type(t), string(s) + {} + + token(std::string &&s, unsigned t = text) : + type(t), string(std::move(s)) + {} + + operator std::string() const { + return string; + } + +}; + + +command_ptr read_fd(int fd); +command_ptr read_file(const std::string &); +command_ptr read_string(const std::string &); + +std::vector tokenize(const std::string &s, bool eval = false); +std::string expand_vars(const std::string &s, const std::unordered_map &env); + +//std::string quote(std::string &&s); +std::string quote(const std::string &s); + + +struct process; +struct value; +class fdmask; + +void parse_tokens(std::vector &&tokens, process &p); + + +int execute(command_ptr cmd); + +int32_t evaluate_expression(const std::string &name, std::vector &&tokens); + +int builtin_directory(const std::vector &, const fdmask &); +int builtin_echo(const std::vector &, const fdmask &); +int builtin_parameters(const std::vector &, const fdmask &); +int builtin_quote(const std::vector &tokens, const fdmask &); +int builtin_set(const std::vector &, const fdmask &); +int builtin_unset(const std::vector &, const fdmask &); +int builtin_export(const std::vector &, const fdmask &); +int builtin_unexport(const std::vector &, const fdmask &); + +int builtin_evaluate(std::vector &&, const fdmask &); + + +#endif diff --git a/mpw-shell.text b/mpw-shell.text new file mode 100644 index 0000000..740cb21 --- /dev/null +++ b/mpw-shell.text @@ -0,0 +1,21 @@ + + +parser differences. + +I've tried to follow mpw's command line parsing algorithm but there are some differences. + +Mostly this is because + +set q '"' ; echo {q} really " + +is equivalent to + +echo " really " + +mpw removes # comments before shell expansion so this _is_ an error: + +echo {q} # " + +- mpw doesn't split on ; until after variables are expanded. I split before variable expansion. +- escape new-line is not allowed in a '' or {} string. +- quote matching happens when the line is read. diff --git a/value.h b/value.h new file mode 100644 index 0000000..1d3b3fc --- /dev/null +++ b/value.h @@ -0,0 +1,73 @@ + +#ifndef __value_h__ +#define __value_h__ + +#include +#include + +// hold a string and number value. + +struct value { + +public: + + std::string string; + int32_t number = 0; + + // empty token treated as 0. + value() : status(valid) + {} + + value(const value &) = default; + value(value &&) = default; + + value(int32_t n) : + string(std::to_string(n)), + number(n), + status(valid) + {} + + value(const std::string &s) : string(s) + {} + + value(std::string &&s) : string(std::move(s)) + {} + + value &operator=(const value&) = default; + value &operator=(value &&) = default; + + + int32_t to_number() { + if (status == unknown) + scan_number(); + if (status == valid) return number; + expect_number(); + } + + int32_t to_number(int default_value) noexcept { + if (status == unknown) + scan_number(); + if (status == valid) return number; + return default_value; + } + + bool is_number() noexcept { + if (status == unknown) + scan_number(); + return status == valid; + } + + +private: + [[noreturn]] void expect_number() const; + void scan_number() noexcept; + + mutable enum { + unknown, + valid, + invalid + } status = unknown; + +}; + +#endif diff --git a/value.rl b/value.rl new file mode 100644 index 0000000..056ec79 --- /dev/null +++ b/value.rl @@ -0,0 +1,76 @@ + +#include "value.h" +#include + +void value::expect_number() const { + + std::string error; + + error = "Expected a number when \""; + error += string; + error += "\" was encountered"; + + throw std::domain_error(error); +} + +void value::scan_number(void) noexcept { + +%%{ + machine scanner; + hexnumber = + ('$' | '0x' | '0X') + ( + [0-9] ${ value = (value << 4) + fc - '0'; } + | + [A-Fa-f] ${value = (value << 4) + (fc | 0x20) - 'a' + 10; } + )+ + ; + + binnumber = + ('0b' | '0B') + [01]+ ${ value = (value << 1) + fc - '0'; } + ; + + octalnumber = + '0' + [0-7]+ ${ value = (value << 3) + fc - '0'; } + ; + + # a leading 0 is ambiguous since it could also + # be part of the binary or hex prefix. + # however, setting it to 0 is safe. + decnumber = + '0' + | + ([1-9] [0-9]*) ${ value = value * 10 + fc - '0'; } + ; + + + main := + ( hexnumber | decnumber |binnumber) + %{ + status = valid; + number = value; + return; + } + ; +}%% + + if (string.empty()) { + // special case. + status = valid; + number = 0; + return; + } + const char *p = string.data(); + const char *pe = string.data() + string.size(); + const char *eof = pe; + int cs; + int32_t value = 0; + + %%write data; + %%write init; + %%write exec; + + status = invalid; +}