initial version

This commit is contained in:
Kelvin Sherlock 2016-01-27 10:43:34 -05:00
commit 7034e1193e
18 changed files with 3101 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
.o
build/

51
CMakeLists.txt Normal file
View File

@ -0,0 +1,51 @@
set(CMAKE_CXX_COMPILER "clang++")
set(CMAKE_CXX_FLAGS "-std=c++14 -stdlib=libc++ -g -Wall -Wno-unused-const-variable -Wno-unused-variable -Wno-multichar -Wno-c++11-extensions")
project("mpw-shell")
cmake_minimum_required(VERSION 2.6)
add_definitions(-I ${CMAKE_SOURCE_DIR}/)
add_custom_command(
OUTPUT mpw-shell-read.cpp
COMMAND ragel -p -G2 -o mpw-shell-read.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-read.rl"
MAIN_DEPENDENCY mpw-shell-read.rl
)
add_custom_command(
OUTPUT mpw-shell-expand.cpp
COMMAND ragel -p -G2 -o mpw-shell-expand.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-expand.rl"
MAIN_DEPENDENCY mpw-shell-expand.rl
)
add_custom_command(
OUTPUT mpw-shell-token.cpp
COMMAND ragel -p -G2 -o mpw-shell-token.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-token.rl"
MAIN_DEPENDENCY mpw-shell-token.rl
)
add_custom_command(
OUTPUT mpw-shell-command.cpp
COMMAND ragel -p -G2 -o mpw-shell-command.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-command.rl"
MAIN_DEPENDENCY mpw-shell-command.rl
)
add_custom_command(
OUTPUT value.cpp
COMMAND ragel -p -G2 -o value.cpp "${CMAKE_CURRENT_SOURCE_DIR}/value.rl"
MAIN_DEPENDENCY value.rl
)
add_custom_command(
OUTPUT mpw-shell-quote.cpp
COMMAND ragel -p -G2 -o mpw-shell-quote.cpp "${CMAKE_CURRENT_SOURCE_DIR}/mpw-shell-quote.rl"
MAIN_DEPENDENCY mpw-shell-quote.rl
)
add_executable(mpw-shell mpw-shell.cpp mpw-shell-read.cpp mpw-shell-token.cpp mpw-shell-expand.cpp
mpw-shell-execute.cpp mpw-shell-builtins.cpp mpw-shell-parser.cpp value.cpp mpw-shell-quote.cpp)

44
command.h Normal file
View File

@ -0,0 +1,44 @@
#include <memory>
#include <vector>
#include <array>
typedef std::unique_ptr<struct command> command_ptr;
typedef std::vector<command_ptr> command_ptr_vector;
typedef std::array<command_ptr, 2> command_ptr_pair;
struct command {
enum {
};
int type;
virtual ~command();
virtual int run();
};
struct simple_command : public command {
std::string text;
};
struct binary_command : public command {
command_ptr_pair children;
};
struct or_command : public binary_command {
};
struct and_command : public binary_command {
};
struct begin_command : public command {
command_ptr_vector children;
std::string end;
};
struct if_command : public command {
std::string begin;
command_ptr_vector children;
command_ptr_vector else_clause;
std::string end;
};

155
fdset.h Normal file
View File

@ -0,0 +1,155 @@
#ifndef __fdset__
#define __fdset__
#include <array>
#include <initializer_list>
#include <string>
#include <vector>
#include <unistd.h>
class fdset;
class fdmask;
/*
* fdmask does not own the file descriptors and will not close them.
*
*/
class fdmask {
public:
fdmask() = default;
fdmask(const fdmask &) = default;
fdmask(fdmask &&) = default;
fdmask(const std::array<int, 3> &rhs) : _fds(rhs)
{}
#if 0
fdmask(std::initializer_list<int> rhs) : _fds(rhs)
{}
#endif
fdmask &operator=(const fdmask &) = default;
fdmask &operator=(fdmask &&) = default;
fdmask &operator=(const std::array<int, 3> &rhs) {
_fds = rhs;
return *this;
}
#if 0
fdmask &operator=(std::initializer_list<int> rhs) {
_fds = rhs;
}
#endif
void dup() const {
// dup fds to stdin/stdout/stderr.
// called after fork, before exec.
#define __(index, target) \
if (_fds[index] >= 0 && _fds[index] != target) dup2(_fds[index], target)
__(0, STDIN_FILENO);
__(1, STDOUT_FILENO);
__(2, STDERR_FILENO);
#undef __
}
int operator[](unsigned index) const {
return _fds[index];
}
fdmask &operator|=(const fdmask &rhs) {
for (unsigned i = 0; i < 3; ++i) {
if (_fds[i] < 0) _fds[i] = rhs._fds[i];
}
return *this;
}
private:
friend class fdset;
std::array<int, 3> _fds = {{ -1, -1, -1 }};
};
/*
* fd set owns it's descriptors and will close them.
*
*
*/
class fdset {
public:
fdset() = default;
fdset(const fdset &) = delete;
fdset(fdset && rhs) {
std::swap(rhs._fds, _fds);
}
~fdset() {
close();
}
fdset &operator=(const fdset &) = delete;
fdset &operator=(fdset &&rhs) {
if (&rhs != this) {
std::swap(_fds, rhs._fds);
rhs.close();
}
return *this;
}
void close(void) {
for (int &fd : _fds) {
if (fd >= 0) {
::close(fd);
fd = -1;
}
}
}
void set(int index, int fd) {
std::swap(fd, _fds[index]);
if (fd >= 0) ::close(fd);
}
fdmask to_mask() const {
return fdmask(_fds);
}
private:
void reset() {
_fds = {{ -1, -1, -1 }};
}
std::array<int, 3> _fds = {{ -1, -1, -1 }};
};
inline fdmask operator|(const fdmask &lhs, const fdmask &rhs) {
fdmask tmp(lhs);
tmp |= rhs;
return tmp;
}
inline fdmask operator|(const fdset &lhs, const fdmask &rhs) {
fdmask tmp(lhs.to_mask());
tmp |= rhs;
return tmp;
}
struct process {
std::vector<std::string> arguments;
fdset fds;
};
#endif

506
mpw-shell-builtins.cpp Normal file
View File

@ -0,0 +1,506 @@
#include "mpw-shell.h"
#include "fdset.h"
#include "value.h"
#include <string>
#include <vector>
#include <algorithm>
#include <cstdio>
#include <cctype>
namespace {
std::string &lowercase(std::string &s) {
std::transform(s.begin(), s.end(), s.begin(), [](char c){ return std::tolower(c); });
return s;
}
// doesn't handle flag arguments but builtins don't have arguments.
template<class FX>
std::vector<std::string> getopt(const std::vector<std::string> &argv, FX fx) {
std::vector<std::string> out;
out.reserve(argv.size());
std::copy_if(argv.begin()+1, argv.end(), std::back_inserter(out), [&fx](const std::string &s){
if (s.empty()) return false; // ?
if (s.front() == '-') {
std::for_each(s.begin() + 1, s.end(), fx);
return false;
}
return true;
});
return out;
}
/*
* the fdopen() will assume ownership of the fd and close it.
* this is not desirable.
*/
int readfn(void *cookie, char *buffer, int size) {
return ::read((int)(ptrdiff_t)cookie, buffer, size);
}
int writefn(void *cookie, const char *buffer, int size) {
return ::write((int)(ptrdiff_t)cookie, buffer, size);
}
FILE *file_stream(int index, int fd) {
if (fd < 0) {
switch (index) {
case 0: return stdin;
case 1: return stdout;
case 2: return stderr;
default:
return stderr;
}
}
// will not close.
return funopen((const void *)(ptrdiff_t)fd, readfn, writefn, nullptr, nullptr);
}
class io_helper {
public:
FILE *in;
FILE *out;
FILE *err;
io_helper(const fdmask &fds) {
in = file_stream(0, fds[0]);
out = file_stream(1, fds[1]);
err = file_stream(2, fds[2]);
}
~io_helper() {
#define __(x, target) if (x != target) fclose(x)
__(in, stdin);
__(out, stdout);
__(err, stderr);
#undef __
}
io_helper() = delete;
io_helper(const io_helper &) = delete;
io_helper &operator=(const io_helper &) = delete;
};
}
#undef stdin
#undef stdout
#undef stderr
#define stdin io.in
#define stdout io.out
#define stderr io.err
int builtin_unset(const std::vector<std::string> &tokens, const fdmask &) {
for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) {
std::string name = *iter;
lowercase(name);
Environment.erase(name);
}
// unset [no arg] removes ALL variables
if (tokens.size() == 1) {
Environment.clear();
}
return 0;
}
int builtin_set(const std::vector<std::string> &tokens, const fdmask &fds) {
// set var name -- set
// set var -- just print the value
// 3.5 supports -e to also export it.
io_helper io(fds);
if (tokens.size() == 1) {
for (const auto &kv : Environment) {
std::string name = quote(kv.first);
std::string value = quote(kv.second);
fprintf(stdout, "Set %s%s %s\n",
bool(kv.second) ? "-e " : "",
name.c_str(), value.c_str());
}
return 0;
}
if (tokens.size() == 2) {
std::string name = tokens[1];
lowercase(name);
auto iter = Environment.find(name);
if (iter == Environment.end()) {
fprintf(stderr, "### Set - No variable definition exists for %s.\n", name.c_str());
return 2;
}
name = quote(name);
std::string value = quote(iter->second);
fprintf(stdout, "Set %s%s %s\n",
bool(iter->second) ? "-e " : "",
name.c_str(), value.c_str());
return 0;
}
bool exported = false;
if (tokens.size() == 4 && tokens[1] == "-e") {
exported = true;
}
if (tokens.size() > 3 && !exported) {
fputs("### Set - Too many parameters were specified.\n", stderr);
fputs("# Usage - set [name [value]]\n", stderr);
return 1;
}
std::string name = tokens[1+exported];
std::string value = tokens[2+exported];
lowercase(name);
Environment[name] = std::move(EnvironmentEntry(std::move(value), exported));
return 0;
}
static int export_common(bool export_or_unexport, const std::vector<std::string> &tokens, io_helper &io) {
const char *name = export_or_unexport ? "Export" : "Unexport";
struct {
int _r = 0;
int _s = 0;
} flags;
bool error = false;
std::vector<std::string> argv = getopt(tokens, [&](char c){
switch(c) {
case 'r':
case 'R':
flags._r = true;
break;
case 's':
case 'S':
flags._s = true;
break;
default:
fprintf(stderr, "### %s - \"-%c\" is not an option.\n", name, c);
error = true;
break;
}
});
if (error) {
fprintf(stderr, "# Usage - %s [-r | -s | name...]\n", name);
return 1;
}
if (argv.empty()) {
if (flags._r && flags._s) goto conflict;
// list of exported vars.
// -r will generate unexport commands for exported variables.
// -s will only print the names.
name = export_or_unexport ? "Export " : "Unexport ";
for (const auto &kv : Environment) {
const std::string& vname = kv.first;
if (kv.second == export_or_unexport)
fprintf(stdout, "%s%s\n", flags._s ? "" : name, quote(vname).c_str());
}
return 0;
}
else {
// mark as exported.
if (flags._r || flags._s) goto conflict;
for (std::string s : argv) {
lowercase(s);
auto iter = Environment.find(s);
if (iter != Environment.end()) iter->second = export_or_unexport;
}
return 0;
}
conflict:
fprintf(stderr, "### %s - Conflicting options or parameters were specified.\n", name);
fprintf(stderr, "# Usage - %s [-r | -s | name...]\n", name);
return 1;
}
int builtin_export(const std::vector<std::string> &tokens, const fdmask &fds) {
io_helper io(fds);
return export_common(true, tokens, io);
}
int builtin_unexport(const std::vector<std::string> &tokens, const fdmask &fds) {
io_helper io(fds);
return export_common(false, tokens, io);
}
int builtin_echo(const std::vector<std::string> &tokens, const fdmask &fds) {
io_helper io(fds);
bool space = false;
bool n = false;
for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) {
const std::string &s = *iter;
if (s == "-n" || s == "-N") {
n = true;
continue;
}
if (space) {
fputs(" ", stdout);
}
fputs(s.c_str(), stdout);
space = true;
}
if (!n) fputs("\n", stdout);
return 0;
}
int builtin_quote(const std::vector<std::string> &tokens, const fdmask &fds) {
// todo...
io_helper io(fds);
bool space = false;
bool n = false;
for (auto iter = tokens.begin() + 1; iter != tokens.end(); ++iter) {
std::string s = *iter;
if (s == "-n" || s == "-N") {
n = true;
continue;
}
if (space) {
fputs(" ", stdout);
}
s = quote(std::move(s));
fputs(s.c_str(), stdout);
space = true;
}
if (!n) fputs("\n", stdout);
return 0;
}
int builtin_parameters(const std::vector<std::string> &argv, const fdmask &fds) {
io_helper io(fds);
int i = 0;
for (const auto &s : argv) {
fprintf(stdout, "{%d} %s\n", i++, s.c_str());
}
return 0;
}
int builtin_directory(const std::vector<std::string> &tokens, const fdmask &fds) {
// directory [-q]
// directory path
// for relative names, uses {DirectoryPath} (if set) rather than .
// set DirectoryPath ":,{MPW},{MPW}Projects:"
io_helper io(fds);
bool q = false;
bool error = false;
std::vector<std::string> argv = getopt(tokens, [&](char c){
switch(c)
{
case 'q':
case 'Q':
q = true;
break;
default:
fprintf(stderr, "### Directory - \"-%c\" is not an option.\n", c);
error = true;
break;
}
});
if (error) {
fputs("# Usage - Directory [-q | directory]\n", stderr);
return 1;
}
if (argv.size() > 1) {
fputs("### Directory - Too many parameters were specified.\n", stderr);
fputs("# Usage - Directory [-q | directory]\n", stderr);
return 1;
}
if (argv.size() == 1) {
//cd
if (q) {
fputs("### Directory - Conflicting options or parameters were specified.\n", stderr);
return 1;
}
return 0;
}
else {
// pwd
return 0;
}
}
static bool is_assignment(int type) {
switch(type)
{
case '=':
case '+=':
case '-=':
return true;
default:
return false;
}
}
int builtin_evaluate(std::vector<token> &&tokens, const fdmask &fds) {
// evaluate expression
// evaluate variable = expression
// evaluate variable += expression
// evaluate variable -= expression
// flags -- -h -o -b -- print in hex, octal, or binary
// convert the arguments to a stack.
int output = 'd';
io_helper io(fds);
std::reverse(tokens.begin(), tokens.end());
// remove 'Evaluate'
tokens.pop_back();
// check for -h -x -o
if (tokens.size() >= 2 && tokens.back().type == '-') {
const token &t = tokens[tokens.size() - 2];
if (t.type == token::text && t.string.length() == 1) {
int flag = tolower(t.string[0]);
switch(flag) {
case 'o':
case 'h':
case 'b':
output = flag;
tokens.pop_back();
tokens.pop_back();
}
}
}
if (tokens.size() >= 2 && tokens.back().type == token::text)
{
int type = tokens[tokens.size() -2].type;
if (is_assignment(type)) {
std::string name = tokens.back().string;
lowercase(name);
tokens.pop_back();
tokens.pop_back();
int32_t i = evaluate_expression("Evaluate", std::move(tokens));
switch(type) {
case '=':
Environment[name] = std::to_string(i);
break;
case '+=':
case '-=':
{
value old;
auto iter = Environment.find(name);
if (iter != Environment.end()) old = (const std::string &)iter->second;
switch(type) {
case '+=':
i = old.to_number() + i;
break;
case '-=':
i = old.to_number() - i;
break;
}
std::string s = std::to_string(i);
if (iter == Environment.end())
Environment.emplace(std::move(name), std::move(s));
else iter->second = std::move(s);
}
break;
}
return 0;
}
}
int32_t i = evaluate_expression("Evaluate", std::move(tokens));
// todo -- format based on -h, -o, or -b flag.
if (output == 'h') {
fprintf(stdout, "0x%08x\n", i);
return 0;
}
if (output == 'b') {
fputc('0', stdout);
fputc('b', stdout);
for (int j = 0; j < 32; ++j) {
fputc(i & 0x80000000 ? '1' : '0', stdout);
i <<= 1;
}
fputc('\n', stdout);
return 0;
}
if (output == 'o') {
// octal.
fprintf(stdout, "0%o\n", i);
return 0;
}
fprintf(stdout, "%d\n", i);
return 0;
}

123
mpw-shell-command.rl Normal file
View File

@ -0,0 +1,123 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <memory>
#include <stdio.h>
#include <assert.h>
#include "mpw-shell.h"
%%{
machine classify;
alphtype unsigned char;
ws = [ \t];
IF = /if/i;
ELSE = /else/i;
END = /end/i;
EVALUATE = /evaluate/i;
main := |*
IF %eof{ return command_if; };
IF ws => {return command_if; };
ELSE %eof{ return command_else;};
ELSE ws => { return command_else; };
ELSE ws+ IF %eof{ return command_else_if; };
ELSE ws+ IF ws => {return command_else_if; };
END %eof{ return command_end; };
END ws => {return command_end; };
EVALUATE %eof{ return command_evaluate; };
EVALUATE ws => {return command_evaluate; };
*|;
}%%
int classify(const std::string &line) {
%% write data;
int cs;
int act;
const unsigned char *p = (const unsigned char *)line.data();
const unsigned char *pe = (const unsigned char *)line.data() + line.size();
const unsigned char *eof = pe;
const unsigned char *te, *ts;
%%write init;
%%write exec;
return 0;
}
/*
* Generates a linked-list of commands. Why? Because it also checks
* for shell-special syntax (currently if / else /end only) and
* adds pointers to make executing them easier.
*
*/
command_ptr build_command(const std::vector<std::string> &lines) {
std::vector<command_ptr> if_stack;
command_ptr head;
command_ptr prev;
for (const auto &line : lines) {
if (line.empty()) continue;
int type = classify(line);
command_ptr c = std::make_shared<command>(type, line);
if (!head) head = c;
if (!prev) prev = c;
else {
prev->next = c;
prev = c;
}
// if stack...
switch (type) {
case command_if:
if_stack.push_back(c);
break;
case command_else:
case command_else_if:
if (if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Else must be within if ... end.");
}
if_stack.back()->alternate = c;
if_stack.back() = c;
break;
case command_end:
if (if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Extra end command.");
}
if_stack.back()->alternate = c;
if_stack.pop_back();
break;
}
}
if (!if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Unterminated if command.");
}
return head;
}

301
mpw-shell-commands.c Normal file
View File

@ -0,0 +1,301 @@
#line 1 "mpw-shell-commands.rl"
#include <vector>
#include <string>
#include <unordered_map>
#include <memory>
#include <stdio.h>
typedef std::shared_ptr<command> command_ptr;
typedef std::weak_ptr<command> weak_command_ptr;
class command {
enum type {
command_if = 1,
command_else,
command_else_if,
command_end
} = 0;
std::string line;
command_ptr next;
weak_command_ptr alternate; // if -> else -> end.
};
#line 49 "mpw-shell-commands.rl"
int classify(const std::string &line) {
#line 35 "mpw-shell-commands.c"
static const int classify_start = 8;
static const int classify_first_final = 8;
static const int classify_error = 0;
static const int classify_en_main = 8;
#line 55 "mpw-shell-commands.rl"
int cs;
const unsigned char *p = (const unsigned char *)line.data();
const unsigned char *pe = (const unsigned char *)line.data() + line.size();
const unsigned char *eof = pe;
const unsigned char *te, *ts;
#line 52 "mpw-shell-commands.c"
{
cs = classify_start;
ts = 0;
te = 0;
act = 0;
}
#line 63 "mpw-shell-commands.rl"
#line 63 "mpw-shell-commands.c"
{
if ( p == pe )
goto _test_eof;
switch ( cs )
{
tr5:
#line 40 "mpw-shell-commands.rl"
{{p = ((te))-1;}{ return command_else; }}
goto st8;
tr13:
#line 39 "mpw-shell-commands.rl"
{ return command_else;}
#line 39 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr14:
#line 39 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr16:
#line 40 "mpw-shell-commands.rl"
{te = p;p--;{ return command_else; }}
goto st8;
tr17:
#line 42 "mpw-shell-commands.rl"
{ return command_else_if; }
#line 42 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr18:
#line 42 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr19:
#line 43 "mpw-shell-commands.rl"
{te = p+1;{return command_else_if; }}
goto st8;
tr20:
#line 45 "mpw-shell-commands.rl"
{ return command_end; }
#line 45 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr21:
#line 45 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr22:
#line 46 "mpw-shell-commands.rl"
{te = p+1;{return command_end; }}
goto st8;
tr23:
#line 36 "mpw-shell-commands.rl"
{ return command_if; }
#line 36 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr24:
#line 36 "mpw-shell-commands.rl"
{te = p;p--;}
goto st8;
tr25:
#line 37 "mpw-shell-commands.rl"
{te = p+1;{return command_if; }}
goto st8;
st8:
#line 1 "NONE"
{ts = 0;}
if ( ++p == pe )
goto _test_eof8;
case 8:
#line 1 "NONE"
{ts = p;}
#line 137 "mpw-shell-commands.c"
switch( (*p) ) {
case 69u: goto st1;
case 73u: goto st7;
case 101u: goto st1;
case 105u: goto st7;
}
goto st0;
st0:
cs = 0;
goto _out;
st1:
if ( ++p == pe )
goto _test_eof1;
case 1:
switch( (*p) ) {
case 76u: goto st2;
case 78u: goto st6;
case 108u: goto st2;
case 110u: goto st6;
}
goto st0;
st2:
if ( ++p == pe )
goto _test_eof2;
case 2:
switch( (*p) ) {
case 83u: goto st3;
case 115u: goto st3;
}
goto st0;
st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
switch( (*p) ) {
case 69u: goto st9;
case 101u: goto st9;
}
goto st0;
st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
switch( (*p) ) {
case 9u: goto tr15;
case 32u: goto tr15;
}
goto tr14;
tr15:
#line 1 "NONE"
{te = p+1;}
goto st10;
st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
#line 194 "mpw-shell-commands.c"
switch( (*p) ) {
case 9u: goto st4;
case 32u: goto st4;
case 73u: goto st5;
case 105u: goto st5;
}
goto tr16;
st4:
if ( ++p == pe )
goto _test_eof4;
case 4:
switch( (*p) ) {
case 9u: goto st4;
case 32u: goto st4;
case 73u: goto st5;
case 105u: goto st5;
}
goto tr5;
st5:
if ( ++p == pe )
goto _test_eof5;
case 5:
switch( (*p) ) {
case 70u: goto st11;
case 102u: goto st11;
}
goto tr5;
st11:
if ( ++p == pe )
goto _test_eof11;
case 11:
switch( (*p) ) {
case 9u: goto tr19;
case 32u: goto tr19;
}
goto tr18;
st6:
if ( ++p == pe )
goto _test_eof6;
case 6:
switch( (*p) ) {
case 68u: goto st12;
case 100u: goto st12;
}
goto st0;
st12:
if ( ++p == pe )
goto _test_eof12;
case 12:
switch( (*p) ) {
case 9u: goto tr22;
case 32u: goto tr22;
}
goto tr21;
st7:
if ( ++p == pe )
goto _test_eof7;
case 7:
switch( (*p) ) {
case 70u: goto st13;
case 102u: goto st13;
}
goto st0;
st13:
if ( ++p == pe )
goto _test_eof13;
case 13:
switch( (*p) ) {
case 9u: goto tr25;
case 32u: goto tr25;
}
goto tr24;
}
_test_eof8: cs = 8; goto _test_eof;
_test_eof1: cs = 1; goto _test_eof;
_test_eof2: cs = 2; goto _test_eof;
_test_eof3: cs = 3; goto _test_eof;
_test_eof9: cs = 9; goto _test_eof;
_test_eof10: cs = 10; goto _test_eof;
_test_eof4: cs = 4; goto _test_eof;
_test_eof5: cs = 5; goto _test_eof;
_test_eof11: cs = 11; goto _test_eof;
_test_eof6: cs = 6; goto _test_eof;
_test_eof12: cs = 12; goto _test_eof;
_test_eof7: cs = 7; goto _test_eof;
_test_eof13: cs = 13; goto _test_eof;
_test_eof: {}
if ( p == eof )
{
switch ( cs ) {
case 9: goto tr13;
case 10: goto tr16;
case 4: goto tr5;
case 5: goto tr5;
case 11: goto tr17;
case 12: goto tr20;
case 13: goto tr23;
}
}
_out: {}
}
#line 65 "mpw-shell-commands.rl"
return 0;
}

295
mpw-shell-execute.cpp Normal file
View File

@ -0,0 +1,295 @@
#include "mpw-shell.h"
#include "fdset.h"
#include "value.h"
#include <cctype>
#include <cassert>
#include <cerrno>
#include <algorithm>
#include <functional>
#include <unistd.h>
#include <sys/wait.h>
#include <sysexits.h>
/*
* Relevant shell variables (not currently supported)
*
* Echo {Echo} # control the echoing of commands to diagnostic output
* Echo {Exit} # control script termination based on {Status}
*
*/
typedef std::vector<std::string> vs;
namespace {
std::string &lowercase(std::string &s) {
std::transform(s.begin(), s.end(), s.begin(), [](char c){ return std::tolower(c); });
return s;
}
std::unordered_map<std::string, int (*)(const std::vector<std::string> &, const fdmask &)> builtins = {
{"directory", builtin_directory},
{"echo", builtin_echo},
{"parameters", builtin_parameters},
{"quote", builtin_quote},
{"set", builtin_set},
{"unset", builtin_unset},
{"export", builtin_export},
{"unexport", builtin_unexport},
};
}
typedef std::pair<int, command_ptr> icp;
icp execute_all(command_ptr cmd);
// returns status and pointer to the next command to execute.
icp execute_if(command_ptr cmd) {
assert(cmd && cmd->type == command_if);
// evaluate condition...
// skip to else or end.
command_ptr head(cmd);
int status = 0;
// find the end pointer.
// if ... end > file.text
// redirects all output within the block.
command_ptr end = head;
while (end && end->type != command_end) {
end = end->alternate.lock();
}
fdmask fds; // todo -- inherit from block, can be parsed from end line.
fprintf(stdout, " %s ... %s\n", cmd->string.c_str(), end ? end->string.c_str() : "");
// todo -- indent levels.
while(cmd && cmd->type != command_end) {
int32_t e;
std::string s = cmd->string;
s = expand_vars(s, Environment);
auto tokens = tokenize(s, true);
std::reverse(tokens.begin(), tokens.end());
e = 0;
status = 0;
switch(cmd->type) {
case command_else_if:
tokens.pop_back();
case command_if:
tokens.pop_back();
try {
e = evaluate_expression("If", std::move(tokens));
} catch (std::exception &ex) {
fprintf(stderr, "%s\n", ex.what());
status = -5;
}
break;
case command_else:
e = 1;
if (tokens.size() > 1) {
fprintf(stderr, "### Else - Missing if keyword.\n");
fprintf(stderr, "# Usage - Else [if expression...]\n");
e = 0;
status = -3;
}
}
if (e) {
command_ptr tmp;
std::tie(status, tmp) = execute_all(cmd->next);
break;
}
// skip to next condition.
cmd = cmd->alternate.lock();
}
// todo -- print but don't execute remaining alternates
// print the end tokens... [ doesn't include other tokens.]
fprintf(stdout, " End\n");
return std::make_pair(status, end); // return end token -- will advance later.
}
int execute_evaluate(command_ptr cmd) {
fdmask fds; // todo -- inherit from block.
std::string s = cmd->string;
s = expand_vars(s, Environment);
fprintf(stdout, " %s\n", s.c_str());
auto tokens = tokenize(s, true);
return builtin_evaluate(std::move(tokens), fds);
}
int execute_external(const std::vector<std::string> &argv, const fdmask &fds) {
std::vector<char *> cargv;
cargv.reserve(argv.size() + 3);
int status;
int pid;
cargv.push_back((char *)"mpw");
//cargv.push_back((char *)"--shell");
unsigned offset = cargv.size();
std::transform(argv.begin(), argv.end(), std::back_inserter(cargv),
[](const std::string &s) { return strdup(s.c_str()); }
);
cargv.push_back(nullptr);
pid = fork();
if (pid < 0) {
perror("fork: ");
exit(EX_OSERR);
}
if (pid == 0) {
// also export environment...
// handle any indirection...
fds.dup();
execvp(cargv.front(), cargv.data());
perror("execvp: ");
exit(EX_OSERR);
}
std::for_each(cargv.begin()+offset, cargv.end(), free);
for(;;) {
int status;
pid_t ok;
ok = waitpid(pid, &status, 0);
if (ok < 0) {
if (errno == EINTR) continue;
perror("waitpid:");
exit(EX_OSERR);
}
if (WIFEXITED(status)) return WEXITSTATUS(status);
if (WIFSIGNALED(status)) return -1;
fprintf(stderr, "waitpid - unexpected result\n");
exit(EX_OSERR);
}
}
int execute_one(command_ptr cmd) {
if (!cmd) return 0;
assert(cmd && cmd->type == 0);
// todo -- before variable expansion,
// expand |, ||, && control structures.
// (possibly when classifing.)
std::string s = cmd->string;
s = expand_vars(s, Environment);
fprintf(stdout, " %s\n", s.c_str());
auto tokens = tokenize(s);
process p;
parse_tokens(std::move(tokens), p);
fdmask fds = p.fds.to_mask();
std::string name = p.arguments.front();
lowercase(name);
auto iter = builtins.find(name);
if (iter != builtins.end()) {
int status = iter->second(p.arguments, fds);
return status;
}
return execute_external(p.arguments, fds);
return 0;
}
icp execute_all(command_ptr cmd) {
if (!cmd) return std::make_pair(0, cmd);
int status;
while(cmd) {
unsigned type = cmd->type;
switch(type)
{
case command_evaluate:
status = execute_evaluate(cmd);
break;
default:
status = execute_one(cmd);
break;
case command_if:
std::tie(status, cmd) = execute_if(cmd);
break;
case command_end:
case command_else:
case command_else_if:
return std::make_pair(status, cmd);
}
Environment["status"] = std::to_string(status);
if (status != 0) {
// only if Environment["Exit"] ?
throw std::runtime_error("### MPW Shell - Execution of input terminated.");
}
cmd = cmd->next;
}
return std::make_pair(status, cmd);
}
int execute(command_ptr cmd) {
int status;
std::tie(status, cmd) = execute_all(cmd);
return status;
}

132
mpw-shell-expand.rl Normal file
View File

@ -0,0 +1,132 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <stdio.h>
#include "mpw-shell.h"
%%{
machine line_parser;
alphtype unsigned char;
escape = 0xb6;
ws = [ \t];
nl = '\n';
action push_back {
line.push_back(fc);
}
action push_back_escape {
line.push_back(escape);
line.push_back(fc);
}
sstring =
['] $push_back
( (any-nl-[']) $push_back )*
['] $push_back
$err{
fprintf(stderr, "### MPW Shell - 's must occur in pairs.\n");
}
;
# same quoting logic as ' string
vstring =
'{'
( (any-nl-'}') ${var.push_back(fc); } )*
'}'
${
if (!var.empty()) {
// flag to pass through vs "" ?
auto iter = env.find(var);
if (iter == env.end()) {
line.push_back('{');
line.append(var);
line.push_back('}');
}
else {
line.append((std::string)iter->second);
}
}
var.clear();
}
$err{
fprintf(stderr, "### MPW Shell - {s must occur in pairs.\n");
}
;
# double-quoted string.
# escape \n is ignored. others do nothing.
dstring =
["] $push_back
(
escape (
nl ${ /* esc newline */ }
|
(any-nl) $push_back_escape
)
|
vstring
|
(any-escape-nl-["{]) $push_back
)* ["] $push_back
$err{
fprintf(stderr, "### MPW Shell - \"s must occur in pairs.\n");
}
;
main :=
(
sstring
|
dstring
|
vstring
|
escape any $push_back_escape
|
(any-['"{]) $push_back
)*
;
}%%
%% write data;
/*
* has to be done separately since you can do dumb stuff like:
* set q '"' ; echo {q} dsfsdf"
*/
std::string expand_vars(const std::string &s, const std::unordered_map<std::string, EnvironmentEntry> &env) {
if (s.find('{') == s.npos) return s;
std::string var;
std::string line;
int cs;
const unsigned char *p = (const unsigned char *)s.data();
const unsigned char *pe = (const unsigned char *)s.data() + s.size();
const unsigned char *eof = pe;
%%write init;
%%write exec;
return line;
}

406
mpw-shell-parser.cpp Normal file
View File

@ -0,0 +1,406 @@
#include "mpw-shell.h"
#include "fdset.h"
#include "value.h"
#include <unistd.h>
#include <fcntl.h>
/*
* I'm sick of fighting with lemon. Just generate it by hand.
*
*/
template<class T>
T pop(std::vector<T> &v) {
T t = std::move(v.back());
v.pop_back();
return t;
}
int open(const std::string &name, int flags) {
// dup2 does not copy the O_CLOEXEC flag so it's safe to use.
int fd = ::open(name.c_str(), flags | O_CLOEXEC, 0666);
if (fd < 0) {
std::string error = "### MPW Shell - Unable to open ";
error.push_back('"');
error.append(name);
error.push_back('"');
error.push_back('.');
throw std::runtime_error(error);
}
return fd;
}
void parse_tokens(std::vector<token> &&tokens, process &p) {
fdset fds;
std::vector<std::string> argv;
std::reverse(tokens.begin(), tokens.end());
argv.reserve(tokens.size());
// first token is always treated as a string.
token t = pop(tokens);
argv.emplace_back(std::move(t.string));
while(!tokens.empty()) {
t = pop(tokens);
switch (t.type) {
// >, >> -- redirect stdout.
case '>':
case '>>':
{
int flags;
if (t.type == '>') flags = O_WRONLY | O_CREAT | O_TRUNC;
else flags = O_WRONLY | O_CREAT | O_APPEND;
if (tokens.empty()) {
throw std::runtime_error("### MPW Shell - Missing file name.");
}
token name = pop(tokens);
int fd = open(name.string, flags);
fds.set(1, fd);
}
break;
// < -- redirect stdin.
case '<':
{
int flags = O_RDONLY;
if (tokens.empty()) {
throw std::runtime_error("### MPW Shell - Missing file name.");
}
token name = pop(tokens);
int fd = open(name.string, flags);
fds.set(0, fd);
}
break;
default:
argv.emplace_back(std::move(t.string));
break;
}
}
p.arguments = std::move(argv);
p.fds = std::move(fds);
}
class expression_parser {
public:
expression_parser(const std::string &n, std::vector<token> &&t) :
name(n), tokens(std::move(t))
{}
expression_parser(const expression_parser &) = delete;
expression_parser(expression_parser &&) = delete;
expression_parser& operator=(const expression_parser &) = delete;
expression_parser& operator=(expression_parser &&) = delete;
// returns integer value of the expression.
int32_t evaluate();
private:
value terminal();
value unary();
value binary();
value eval(int op, value &lhs, value &rhs);
[[noreturn]] void expect_binary_operator();
[[noreturn]] void end_of_expression();
[[noreturn]] void divide_by_zero();
int peek_type() const;
token next();
static int precedence(int);
void skip() {
if (!tokens.empty()) tokens.pop_back();
}
const std::string &name;
std::vector<token> tokens;
};
int expression_parser::peek_type() const {
if (tokens.empty()) return token::eof;
return tokens.back().type;
}
token expression_parser::next() {
if (tokens.empty()) return token("", token::eof); // error?
return pop(tokens);
}
void expression_parser::expect_binary_operator() {
token t = next();
std::string error;
error = "### " + name;
error += " - Expected a binary operator when \"";
error += t.string;
error += "\" was encountered.";
throw std::runtime_error(error);
}
void expression_parser::end_of_expression() {
std::string error;
error = "### " + name + " - Unexpected end of expression.";
throw std::runtime_error(error);
}
void expression_parser::divide_by_zero() {
std::string error;
error = "### " + name + " - Attempt to divide by zero.";
throw std::runtime_error(error);
}
value expression_parser::binary() {
std::vector<value> output;
std::vector<std::pair<int, int>> operators;
value v = unary();
output.emplace_back(std::move(v));
for(;;) {
// check for an operator.
int type = peek_type();
if (type == token::eof) break;
if (type == ')') break;
int p = precedence(type);
if (!p) expect_binary_operator();
skip();
while (!operators.empty() && operators.back().second <= p) {
// reduce top ops.
int op = operators.back().first;
operators.pop_back();
value rhs = pop(output);
value lhs = pop(output);
output.emplace_back(eval(op, lhs, rhs));
}
operators.push_back(std::make_pair(type, p));
v = unary();
output.emplace_back(std::move(v));
}
// reduce...
while (!operators.empty()) {
int op = pop(operators).first;
value rhs = pop(output);
value lhs = pop(output);
output.emplace_back(eval(op, lhs, rhs));
}
if (output.size() != 1) throw std::runtime_error("binary stack error");
return pop(output);
}
int expression_parser::precedence(int op) {
switch (op) {
case '*':
case '%':
case '/':
return 3;
case '+':
case '-':
return 4;
case '>>':
case '<<':
return 5;
case '<':
case '<=':
case '>':
case '>=':
return 6;
case '==':
case '!=':
case token::equivalent:
case token::not_equivalent:
return 7;
case '&':
return 8;
case '^':
return 9;
case '|':
return 10;
case '&&':
return 11;
case '||':
return 12;
}
return 0;
//throw std::runtime_error("unimplemented op";);
}
value expression_parser::eval(int op, value &lhs, value &rhs) {
switch (op) {
case '*':
return lhs.to_number() * rhs.to_number();
case '/':
if (!rhs.to_number()) divide_by_zero();
return lhs.to_number() / rhs.to_number();
case '%':
if (!rhs.to_number()) divide_by_zero();
return lhs.to_number() % rhs.to_number();
case '+':
return lhs.to_number() + rhs.to_number();
case '-':
return lhs.to_number() - rhs.to_number();
case '>':
return lhs.to_number() > rhs.to_number();
case '<':
return lhs.to_number() < rhs.to_number();
case '<=':
return lhs.to_number() <= rhs.to_number();
case '>=':
return lhs.to_number() >= rhs.to_number();
case '>>':
return lhs.to_number() >> rhs.to_number();
case '<<':
return lhs.to_number() >> rhs.to_number();
// logical || . NaN ok
case '||':
return lhs.to_number(1) || rhs.to_number(1);
// logical && . NaN ok
case '&&':
return lhs.to_number(1) && rhs.to_number(1);
case '|':
return lhs.to_number() | rhs.to_number();
case '&':
return lhs.to_number() & rhs.to_number();
case '^':
return lhs.to_number() ^ rhs.to_number();
case '==':
// string ==. 0x00==0 -> 0
// as a special case, 0=="". go figure.
if (lhs.string == "" && rhs.string == "0") return 1;
if (lhs.string == "0" && rhs.string == "") return 1;
return lhs.string == rhs.string;
case '!=':
if (lhs.string == "" && rhs.string == "0") return 0;
if (lhs.string == "0" && rhs.string == "") return 0;
return lhs.string != rhs.string;
}
// todo...
throw std::runtime_error("unimplemented op");
}
value expression_parser::unary() {
int type = peek_type();
switch (type) {
case '-':
case '+':
case '!':
case '~':
next();
value v = unary();
// + is a nop.. doesn't even check if it's a number.
if (type == '-') v = -v.to_number();
if (type == '~') v = ~v.to_number();
if (type == '!') v = !v.to_number(1); // logical !, NaN ok.
return v;
}
return terminal();
}
value expression_parser::terminal() {
int type = peek_type();
if (type == token::text) {
token t = next();
return value(std::move(t.string));
}
if (type == '(') {
next();
value v = binary();
type = peek_type();
if (type != ')') {
end_of_expression();
}
next();
return v;
}
// insert a fake token.
return value();
}
int32_t expression_parser::evaluate() {
if (tokens.empty()) return 0;
value v = binary();
if (!tokens.empty()) {
if (tokens.back().type == ')')
throw std::runtime_error("### MPW Shell - Extra ) command.");
throw std::runtime_error("evaluation stack error."); // ?? should be caught above.
}
return v.to_number(1);
}
int32_t evaluate_expression(const std::string &name, std::vector<token> &&tokens) {
expression_parser p(name, std::move(tokens));
return p.evaluate();
}

75
mpw-shell-quote.rl Normal file
View File

@ -0,0 +1,75 @@
#include <string>
bool must_quote(const std::string &s){
%%{
machine must_quote;
alphtype unsigned char;
quotable = (
[ \t\r\n]
|
0x00
|
[0x80-0xff]
|
[+#;&|()'"/\\{}`?*<>]
|
'-'
|
'['
|
']'
);
#simpler just to say what's ok.
normal = [A-Za-z0-9_.:];
main :=
(
normal
|
(any-normal) ${return true;}
)*
;
}%%
%%write data;
int cs;
const unsigned char *p = (const unsigned char *)s.data();
const unsigned char *pe = (const unsigned char *)s.data() + s.size();
const unsigned char *eof = nullptr;
%%write init;
%%write exec;
return false;
}
#if 0
std::string quote(const std::string &s) {
std::string tmp(s);
return quote(std::move(tmp));
}
#endif
std::string quote(const std::string &s) {
const char q = '\'';
const char *escape_q = "'\xd8''";
if (!must_quote(s)) return s;
std::string out;
out.reserve(s.length() + (s.length() >> 1));
out.push_back(q);
for (char c : s) {
if (c == q) {
out.append(escape_q);
} else
out.push_back(c);
}
out.push_back(q);
return out;
}

374
mpw-shell-read.rl Normal file
View File

@ -0,0 +1,374 @@
#include "mpw-shell.h"
#include <unistd.h>
#include <fcntl.h>
%%{
machine classify;
alphtype unsigned char;
ws = [ \t];
IF = /if/i;
ELSE = /else/i;
END = /end/i;
BEGIN = /begin/i;
EVALUATE = /evaluate/i;
main := |*
IF %eof{ return command_if; };
IF ws => {return command_if; };
ELSE %eof{ return command_else;};
ELSE ws => { return command_else; };
ELSE ws+ IF %eof{ return command_else_if; };
ELSE ws+ IF ws => {return command_else_if; };
END %eof{ return command_end; };
END ws => {return command_end; };
EVALUATE %eof{ return command_evaluate; };
EVALUATE ws => {return command_evaluate; };
*|;
}%%
static int classify(const std::string &line) {
%%machine classify;
%% write data;
int cs;
int act;
const unsigned char *p = (const unsigned char *)line.data();
const unsigned char *pe = (const unsigned char *)line.data() + line.size();
const unsigned char *eof = pe;
const unsigned char *te, *ts;
%%write init;
%%write exec;
return 0;
}
/*
* this state machine splits input into lines.
* only new-line escapes are removed.
* "", '', and {} are also matched.
*
*/
/*
* from experimentation, mpw splits on ; after variable expansion;
* this splits before. something stupid like:
* set q '"'; echo {q} ; "
* will not be handled correctly. oh well.
* (should probably just drop that and we can then combine tokenizing w/
* variable expansion)
*/
%%{
machine line_parser;
alphtype unsigned char;
escape = 0xb6;
ws = [ \t];
nl = ('\n' | '\r');
action add_line {
/* strip trailing ws */
while (!scratch.empty() && isspace(scratch.back())) scratch.pop_back();
if (!scratch.empty()) {
command_ptr cmd = std::make_shared<command>(std::move(scratch));
cmd->line = start_line;
start_line = line;
program.emplace_back(std::move(cmd));
}
scratch.clear();
fgoto main;
}
action push_back {
scratch.push_back(fc);
}
action push_back_escape {
scratch.push_back(escape);
scratch.push_back(fc);
}
comment = '#' (any-nl)*;
escape_seq =
escape
(
nl ${ /* esc newline */ line++; }
|
(any-nl) $push_back_escape
)
;
# single-quoted string. only escape \n is special.
# handling is so stupid I'm not going to support it.
sstring =
['] $push_back
( (any-nl-[']) $push_back )*
['] $push_back
$err{
throw std::runtime_error("### MPW Shell - 's must occur in pairs.");
}
;
# same quoting logic as ' string
vstring =
'{' $push_back
( (any-nl-'}') $push_back )*
'}' $push_back
$err{
throw std::runtime_error("### MPW Shell - {s must occur in pairs.");
}
;
# double-quoted string.
# escape \n is ignored. others do nothing.
dstring =
["] $push_back
(
escape_seq
|
vstring
|
(any-escape-nl-["{]) $push_back
)* ["] $push_back
$err{
throw std::runtime_error("### MPW Shell - \"s must occur in pairs.");
}
;
# this is a mess ...
coalesce_ws =
ws
(
ws
|
escape nl ${ line++; }
)*
<:
any ${ scratch.push_back(' '); fhold; }
;
line :=
(
sstring
|
dstring
|
vstring
|
[;] $add_line
|
escape_seq
|
coalesce_ws
|
(any-escape-nl-ws-[;#'"{]) $push_back
)*
comment?
nl ${ line++; } $add_line
;
main :=
# strip leading whitespace.
ws*
<: # left guard -- higher priority to ws.
any ${ fhold; fgoto line; }
;
}%%
class line_parser {
public:
void process(const void *data, size_t size) {
process((const unsigned char *)data, size, false);
}
command_ptr finish() {
process((const unsigned char *)"\n\n", 2, true);
return build_program();
}
line_parser();
private:
%% machine line_parser;
%% write data;
std::vector<command_ptr> program;
std::string scratch;
int line = 1;
int cs;
command_ptr build_program();
void process(const unsigned char *data, size_t size, bool final);
};
line_parser::line_parser() {
%% machine line_parser;
%% write init;
}
void line_parser::process(const unsigned char *data, size_t size, bool final) {
int start_line;
const unsigned char *p = data;
const unsigned char *pe = data + size;
const unsigned char *eof = nullptr;
if (final)
eof = pe;
start_line = line;
%% machine line_parser;
%% write exec;
if (cs == line_parser_error) {
throw std::runtime_error("MPW Shell - Lexer error.");
}
if (cs != line_parser_start && final) {
// will this happen?
throw std::runtime_error("MPW Shell - Lexer error.");
}
}
/*
* Generates a linked-list of commands. Why? Because it also checks
* for shell-special syntax (currently if / else /end only) and
* adds pointers to make executing them easier.
*
*/
// todo -- use recursive descent parser, support begin/end, (), ||, &&, etc.
command_ptr line_parser::build_program() {
std::vector<command_ptr> if_stack;
command_ptr head;
command_ptr ptr;
if (program.empty()) return head;
std::reverse(program.begin(), program.end());
head = program.back();
while (!program.empty()) {
if (ptr) ptr->next = program.back();
ptr = std::move(program.back());
program.pop_back();
int type = ptr->type = classify(ptr->string);
ptr->level = if_stack.size();
// if stack...
switch (type) {
default:
break;
case command_if:
if_stack.push_back(ptr);
break;
case command_else:
case command_else_if:
if (if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Else must be within if ... end.");
}
ptr->level--;
if_stack.back()->alternate = ptr;
if_stack.back() = ptr;
break;
case command_end:
if (if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Extra end command.");
}
ptr->level--;
if_stack.back()->alternate = ptr;
if_stack.pop_back();
break;
}
}
if (!if_stack.empty()) {
throw std::runtime_error("### MPW Shell - Unterminated if command.");
}
return head;
}
command_ptr read_fd(int fd) {
unsigned char buffer[1024];
line_parser p;
for(;;) {
ssize_t s = read(fd, buffer, sizeof(buffer));
if (s < 0) {
throw std::runtime_error("MPW Shell - Read error.");
}
p.process(buffer, s);
}
return p.finish();
}
command_ptr read_file(const std::string &name) {
int fd;
fd = open(name.c_str(), O_RDONLY);
if (fd < 0) {
throw std::runtime_error("MPW Shell - Unable to open file " + name + ".");
}
auto tmp = read_fd(fd);
close(fd);
return tmp;
}
command_ptr read_string(const std::string &s) {
line_parser p;
p.process(s.data(), s.size());
return p.finish();
}

258
mpw-shell-token.rl Normal file
View File

@ -0,0 +1,258 @@
#include <string>
#include <vector>
#include <stdio.h>
#include "mpw-shell.h"
%%{
machine tokenizer;
alphtype unsigned char;
escape = 0xb6;
ws = [ \t];
nl = '\n' | '\r';
action push_token {
if (!scratch.empty()) {
tokens.emplace_back(std::move(scratch));
scratch.clear();
}
}
action push_back {
scratch.push_back(fc);
}
# vstring_quoted =
# [{]
# ( (any-nl-[}]) ${ var.push_back(fc); } )*
# [}]
# %{
# auto iter = Environment.find(var);
# if (iter != Environment.end() {
# scratch.append(iter->second);
# })
# var.clear();
# }
# $err{
# throw std::runtime_error("### MPW Shell - '{ must occur in pairs.");
# }
# ;
# vstring_unqoted =
# [{]
# ( (any-nl-[}]) ${ var.push_back(fc); } )*
# [}]
# %{
# auto iter = Environment.find(var);
# if (iter != Environment.end() {
# // re-parse. ", ', { are not
# // special. all others are treated normally.
# })
# var.clear();
# }
# $err{
# throw std::runtime_error("### MPW Shell - '{ must occur in pairs.");
# }
# ;
sstring =
[']
( (any-nl-[']) $push_back )*
[']
$err{
throw std::runtime_error("### MPW Shell - 's must occur in pairs.");
}
;
escape_seq =
escape
(
'f' ${scratch.push_back('\f'); }
|
'n' ${scratch.push_back('\n'); /* \r ? */ }
|
't' ${scratch.push_back('\t'); }
|
any-[fnt] $push_back
)
;
# double-quoted string.
dstring =
["]
(
escape_seq
|
(any-escape-["]) $push_back
)*
["]
$err{
throw std::runtime_error("### MPW Shell - \"s must occur in pairs.");
}
;
action eval { eval }
# > == start state (single char tokens or common prefix)
# % == final state (multi char tokens w/ unique prefix)
# $ == all states
main := |*
ws+ >push_token;
'>>' %push_token => { tokens.emplace_back(">>", '>>'); };
'>' %push_token => { tokens.emplace_back(">", '>'); };
'<' %push_token => { tokens.emplace_back("<", '<'); };
'||' %push_token => { tokens.emplace_back("||", '||'); };
'|' %push_token => { tokens.emplace_back("|", '|'); };
'&&'
%push_token => { tokens.emplace_back("&&", '&&'); };
# eval-only.
'(' when eval
%push_token => { tokens.emplace_back("(", '('); };
')' when eval
%push_token => { tokens.emplace_back(")", ')'); };
'<<' when eval
%push_token => { tokens.emplace_back("<<", '<<'); };
'<=' when eval
%push_token => { tokens.emplace_back("<=", '<='); };
'>=' when eval
%push_token => { tokens.emplace_back(">=", '>='); };
'==' when eval
%push_token => { tokens.emplace_back("==", '=='); };
'!=' when eval
%push_token => { tokens.emplace_back("!=", '!='); };
'&' when eval
%push_token => { tokens.emplace_back("&", '&'); };
'+' when eval
>push_token => { tokens.emplace_back("+", '+'); };
'*' when eval
%push_token => { tokens.emplace_back("*", '*'); };
'%' when eval
%push_token => { tokens.emplace_back("%", '%'); };
'-' when eval
%push_token => { tokens.emplace_back("+", '-'); };
'!' when eval
%push_token => { tokens.emplace_back("!", '!'); };
'^' when eval
%push_token => { tokens.emplace_back("^", '^'); };
'~' when eval
%push_token => { tokens.emplace_back("~", '~'); };
'=' when eval
%push_token => { tokens.emplace_back("=", '='); };
'+=' when eval
%push_token => { tokens.emplace_back("+=", '+='); };
'-=' when eval
%push_token => { tokens.emplace_back("-=", '-='); };
sstring ;
dstring ;
escape_seq;
(any-escape-['"]) => push_back; # { scratch.append(ts, te); };
#(any-escape-ws-[>'"])+ => { scratch.append(ts, te); };
*|
;
}%%
inline void replace_eval_token(token &t) {
%%{
machine eval_keywords;
main :=
/and/i %{ t.type = '&&'; }
|
/or/i %{ t.type = '||'; }
|
/not/i %{ t.type = '!'; }
|
/div/i %{ t.type = '/'; }
|
/mod/i %{ t.type = '%'; }
;
}%%
%%machine eval_keywords;
%%write data;
const char *p = t.string.data();
const char *pe = t.string.data() + t.string.size();
const char *eof = pe;
int cs;
%%write init;
%%write exec;
}
std::vector<token> tokenize(const std::string &s, bool eval)
{
std::vector<token> tokens;
std::string scratch;
%%machine tokenizer;
%% write data;
int cs, act;
unsigned const char *p = (const unsigned char *)s.data();
unsigned const char *pe = (const unsigned char *)s.data() + s.size();
unsigned const char *eof = pe;
unsigned const char *ts, *te;
%%write init;
%%write exec;
if (cs == tokenizer_error) {
throw std::runtime_error("MPW Shell - Lexer error.");
}
if (!scratch.empty()) {
tokens.emplace_back(std::move(scratch));
scratch.clear();
}
// alternate operator tokens for eval
if (eval) {
for (token & t : tokens) {
if (t.type == token::text) replace_eval_token(t);
}
}
return tokens;
}

50
mpw-shell.cpp Normal file
View File

@ -0,0 +1,50 @@
#include <vector>
#include <string>
#include <unordered_map>
#include <unistd.h>
#include <fcntl.h>
#include <stdio.h>
#include <stdlib.h>
#include "mpw-shell.h"
std::unordered_map<std::string, EnvironmentEntry> Environment;
// should set {MPW}, {MPWVersion}, then execute {MPW}StartUp
void init(void) {
Environment.emplace("status", std::string("0"));
Environment.emplace("exit", std::string("1")); // terminate script on error.
}
int main(int argc, char **argv) {
init();
command_ptr head;
try {
head = read_fd(0);
} catch (std::exception &ex) {
fprintf(stderr, "%s\n", ex.what());
exit(1);
}
try {
int status = execute(head);
exit(status);
} catch(std::exception &ex) {
fprintf(stderr, "%s\n", ex.what());
exit(1);
}
}

159
mpw-shell.h Normal file
View File

@ -0,0 +1,159 @@
#ifndef __mpw_shell_h__
#define __mpw_shell_h__
#include <string>
#include <vector>
#include <unordered_map>
#include <memory>
#include <cstdint>
class command;
typedef std::shared_ptr<command> command_ptr;
typedef std::weak_ptr<command> weak_command_ptr;
const unsigned char escape = 0xb6;
// environment has a bool which indicates if exported.
struct EnvironmentEntry {
public:
operator bool() const { return exported; }
operator bool&() { return exported; }
operator const std::string&() const { return value; }
operator std::string&() { return value; }
EnvironmentEntry() = default;
EnvironmentEntry(const EnvironmentEntry &) = default;
EnvironmentEntry(EnvironmentEntry &&) = default;
EnvironmentEntry(const std::string &s, bool e = false) : value(s), exported(e)
{}
EnvironmentEntry(std::string &&s, bool e = false) : value(std::move(s)), exported(e)
{}
~EnvironmentEntry() = default;
EnvironmentEntry& operator=(bool &rhs) { exported = rhs; return *this; }
EnvironmentEntry& operator=(const std::string &rhs) { value = rhs; return *this; }
EnvironmentEntry& operator=(const EnvironmentEntry &) = default;
EnvironmentEntry& operator=(EnvironmentEntry &&) = default;
private:
std::string value;
bool exported = false;
};
extern std::unordered_map<std::string, EnvironmentEntry> Environment;
enum {
command_if = 1,
command_else,
command_else_if,
command_end,
command_begin,
command_evaluate,
};
class command {
public:
unsigned type = 0;
unsigned line = 0;
unsigned level = 0;
std::string string;
command_ptr next;
weak_command_ptr alternate; // if -> else -> end. weak to prevent cycles.
command() = default;
command(command &&) = default;
command(const command &) = default;
command(unsigned t, const std::string &s) :
type(t), string(s)
{}
command(unsigned t, std::string &&s) :
type(t), string(std::move(s))
{}
command(const std::string &s) : string(s)
{}
command(std::string &&s) : string(std::move(s))
{}
};
class token {
public:
enum {
text = 0,
eof,
equivalent,
not_equivalent,
// remainder are characters.
};
unsigned type = text;
std::string string;
token() = default;
token(token &&) = default;
token(const token&) = default;
token &operator=(token &&) = default;
token &operator=(const token &) = default;
token(const std::string &s, unsigned t = text) :
type(t), string(s)
{}
token(std::string &&s, unsigned t = text) :
type(t), string(std::move(s))
{}
operator std::string() const {
return string;
}
};
command_ptr read_fd(int fd);
command_ptr read_file(const std::string &);
command_ptr read_string(const std::string &);
std::vector<token> tokenize(const std::string &s, bool eval = false);
std::string expand_vars(const std::string &s, const std::unordered_map<std::string, EnvironmentEntry> &env);
//std::string quote(std::string &&s);
std::string quote(const std::string &s);
struct process;
struct value;
class fdmask;
void parse_tokens(std::vector<token> &&tokens, process &p);
int execute(command_ptr cmd);
int32_t evaluate_expression(const std::string &name, std::vector<token> &&tokens);
int builtin_directory(const std::vector<std::string> &, const fdmask &);
int builtin_echo(const std::vector<std::string> &, const fdmask &);
int builtin_parameters(const std::vector<std::string> &, const fdmask &);
int builtin_quote(const std::vector<std::string> &tokens, const fdmask &);
int builtin_set(const std::vector<std::string> &, const fdmask &);
int builtin_unset(const std::vector<std::string> &, const fdmask &);
int builtin_export(const std::vector<std::string> &, const fdmask &);
int builtin_unexport(const std::vector<std::string> &, const fdmask &);
int builtin_evaluate(std::vector<token> &&, const fdmask &);
#endif

21
mpw-shell.text Normal file
View File

@ -0,0 +1,21 @@
parser differences.
I've tried to follow mpw's command line parsing algorithm but there are some differences.
Mostly this is because
set q '"' ; echo {q} really "
is equivalent to
echo " really "
mpw removes # comments before shell expansion so this _is_ an error:
echo {q} # "
- mpw doesn't split on ; until after variables are expanded. I split before variable expansion.
- escape new-line is not allowed in a '' or {} string.
- quote matching happens when the line is read.

73
value.h Normal file
View File

@ -0,0 +1,73 @@
#ifndef __value_h__
#define __value_h__
#include <string>
#include <cstdint>
// hold a string and number value.
struct value {
public:
std::string string;
int32_t number = 0;
// empty token treated as 0.
value() : status(valid)
{}
value(const value &) = default;
value(value &&) = default;
value(int32_t n) :
string(std::to_string(n)),
number(n),
status(valid)
{}
value(const std::string &s) : string(s)
{}
value(std::string &&s) : string(std::move(s))
{}
value &operator=(const value&) = default;
value &operator=(value &&) = default;
int32_t to_number() {
if (status == unknown)
scan_number();
if (status == valid) return number;
expect_number();
}
int32_t to_number(int default_value) noexcept {
if (status == unknown)
scan_number();
if (status == valid) return number;
return default_value;
}
bool is_number() noexcept {
if (status == unknown)
scan_number();
return status == valid;
}
private:
[[noreturn]] void expect_number() const;
void scan_number() noexcept;
mutable enum {
unknown,
valid,
invalid
} status = unknown;
};
#endif

76
value.rl Normal file
View File

@ -0,0 +1,76 @@
#include "value.h"
#include <stdexcept>
void value::expect_number() const {
std::string error;
error = "Expected a number when \"";
error += string;
error += "\" was encountered";
throw std::domain_error(error);
}
void value::scan_number(void) noexcept {
%%{
machine scanner;
hexnumber =
('$' | '0x' | '0X')
(
[0-9] ${ value = (value << 4) + fc - '0'; }
|
[A-Fa-f] ${value = (value << 4) + (fc | 0x20) - 'a' + 10; }
)+
;
binnumber =
('0b' | '0B')
[01]+ ${ value = (value << 1) + fc - '0'; }
;
octalnumber =
'0'
[0-7]+ ${ value = (value << 3) + fc - '0'; }
;
# a leading 0 is ambiguous since it could also
# be part of the binary or hex prefix.
# however, setting it to 0 is safe.
decnumber =
'0'
|
([1-9] [0-9]*) ${ value = value * 10 + fc - '0'; }
;
main :=
( hexnumber | decnumber |binnumber)
%{
status = valid;
number = value;
return;
}
;
}%%
if (string.empty()) {
// special case.
status = valid;
number = 0;
return;
}
const char *p = string.data();
const char *pe = string.data() + string.size();
const char *eof = pe;
int cs;
int32_t value = 0;
%%write data;
%%write init;
%%write exec;
status = invalid;
}