From fed90b3753aa80268486f682ccd7e446cce69f39 Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Thu, 21 Jul 2016 11:46:39 -0400 Subject: [PATCH] rewrite phase1 in C -- slightly strange processing is ugly in ragel. Also move error checking for strings/variables until later. --- CMakeLists.txt | 2 +- phase1.cpp | 179 +++++++++++++++++++++++++++++++++++++++++++++++++ phase1.h | 13 ++-- 3 files changed, 189 insertions(+), 5 deletions(-) create mode 100644 phase1.cpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 1a3940e..62fa92d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -54,7 +54,7 @@ macro(RAGEL_TARGET Name Input Output) endmacro() -RAGEL_TARGET(phase1 phase1.rl phase1.cpp COMPILE_FLAGS "-p -G2") +#RAGEL_TARGET(phase1 phase1.rl phase1.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(phase2 phase2.rl phase2.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(pathnames pathnames.rl pathnames.cpp COMPILE_FLAGS "-p -G2") RAGEL_TARGET(mpw-shell-token mpw-shell-token.rl mpw-shell-token.cpp COMPILE_FLAGS "-p -G2") diff --git a/phase1.cpp b/phase1.cpp new file mode 100644 index 0000000..2af7e09 --- /dev/null +++ b/phase1.cpp @@ -0,0 +1,179 @@ +#include "phase1.h" +#include + +enum { + st_text, + st_text_esc, + + st_comment, + + st_vstring, + st_vstring_esc, + + st_dstring, + st_dstring_esc, + + st_sstring, + st_sstring_esc, + + st_estring, + st_estring1, + st_estring1_esc, + + st_estring2, + st_estring2_esc, + + st_estring3, +}; + + +int phase1::process(unsigned char c, int st) { + + const unsigned char esc = 0xb6; + + + if (c == '\r' || c == '\n') { + switch (st) { + case st_text: + case st_comment: + default: // will error later. + flush(); + multiline = false; + line++; + return st_text; + + case st_text_esc: + case st_vstring_esc: + case st_dstring_esc: + case st_sstring_esc: + case st_estring1_esc: + case st_estring2_esc: + multiline = true; + scratch.pop_back(); + line++; + return st - 1; + } + } + + if (st != st_comment) scratch.push_back(c); + + switch(st) { + + case st_text: +text: + switch(c) { + case '#': + scratch.pop_back(); + return st_comment; + case esc: + return st_text_esc; + case '{': + return st_vstring; + case '"': + return st_dstring; + case '\'': + return st_sstring; + case '`': + return st_estring; + + default: + return st_text; + } + break; + + case st_comment: + return st; + break; + + case st_text_esc: + case st_dstring_esc: + case st_estring1_esc: + case st_estring2_esc: + return st-1; + break; + + + case st_sstring_esc: + // fall through + case st_sstring: + if (c == '\'') return st_text; + if (c == esc) return st_sstring_esc; + return st_sstring; + break; + + case st_dstring: + if (c == '\"') return st_text; + if (c == esc) return st_dstring_esc; + return st_dstring; + break; + + case st_vstring_esc: + // fall through + case st_vstring: + // '{' var '}' or '{{' var '}}' + // don't care if {{ or { at this point. A single } terminates. + if (c == '}') return st_text; + if (c == esc) return st_vstring_esc; + return st_vstring; + + case st_estring: + // ``...`` or `...` + if (c == '`') return st_estring2; + // fall through. + case st_estring1: + if (c == '`') return st_text; + if (c == esc) return st_estring1_esc; + return st_estring1; + + case st_estring2: + if (c == '`') return st_estring3; + if (c == esc) return st_estring2_esc; + return st_estring2; + + case st_estring3: + if (c == '`') return st_text; + // error! handled later. + goto text; + + break; + + } + assert(!"unknown state"); +} + + +void phase1::process(const std::string &s, bool final) { + + for (auto c : s) { + cs = process(c, cs); + } + if (final) finish(); +} + +void phase1::process(const unsigned char *begin, const unsigned char *end, bool final) { + while (begin != end) { + cs = process(*begin++, cs); + } + if (final) finish(); +} + +void phase1::finish() { + + cs = process('\n', cs); + flush(); +} + +void phase1::reset() { + cs = st_text; + multiline = false; + line = 1; + scratch.clear(); +} + +void phase1::flush() { + multiline = false; + if (scratch.empty()) return; + // strip trailing whitespace? + if (pipe_to) pipe_to(std::move(scratch)); + scratch.clear(); +} diff --git a/phase1.h b/phase1.h index b9ffbe3..e1995d4 100644 --- a/phase1.h +++ b/phase1.h @@ -10,7 +10,7 @@ class phase1 { public: typedef std::function pipe_function; - phase1(); + phase1() = default; void process(const unsigned char *begin, const unsigned char *end, bool final = false); @@ -18,11 +18,12 @@ public: process((const unsigned char *)begin, (const unsigned char *)end, final); } - void process(const std::string &s, bool final = false) { process(s.data(), s.data() + s.size(), final); } + void process(const std::string &s, bool final = false);// { process(s.data(), s.data() + s.size(), final); } - void finish() { const char *tmp = "\n"; process(tmp, tmp+1, true); } + void finish();// { const char *tmp = "\n"; process(tmp, tmp+1, true); } void reset(); + void abort() { reset(); } phase1 &operator >>= (pipe_function f) { pipe_to = f; return *this; } @@ -35,14 +36,18 @@ public: return *this; } - void abort() { reset(); } private: + + int process(unsigned char, int); + void flush(); + std::string scratch; pipe_function pipe_to; int line = 1; int cs = 0; + bool multiline = false; }; #endif