From 7691267dea8637e69bff5601c75e492cc181c4f0 Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Mon, 29 Dec 2014 00:12:47 -0500 Subject: [PATCH] template lecture and parser code --- bin/CMakeLists.txt | 37 ++++- bin/template.h | 45 ++++++ bin/template_loader.rl | 288 ++++++++++++++++++++++++++++++++++++++ bin/template_parser.lemon | 114 +++++++++++++++ 4 files changed, 477 insertions(+), 7 deletions(-) create mode 100644 bin/template.h create mode 100644 bin/template_loader.rl create mode 100644 bin/template_parser.lemon diff --git a/bin/CMakeLists.txt b/bin/CMakeLists.txt index 41ea2ff..9f2dd5b 100644 --- a/bin/CMakeLists.txt +++ b/bin/CMakeLists.txt @@ -20,7 +20,7 @@ add_custom_command( MAIN_DEPENDENCY lexer.rl ) -add_custom_command( +add_custom_command( OUTPUT parser.cpp parser.h COMMAND cp -f "${CMAKE_CURRENT_SOURCE_DIR}/parser.lemon" "parser.lemon" COMMAND lemon parser.lemon @@ -31,6 +31,20 @@ add_custom_command( DEPENDS debugger.h ) + +add_custom_command( + OUTPUT template_parser.cpp template_parser.h + COMMAND cp -f "${CMAKE_CURRENT_SOURCE_DIR}/template_parser.lemon" "template_parser.lemon" + COMMAND lemon template_parser.lemon + COMMAND cp -f template_parser.h "${CMAKE_CURRENT_SOURCE_DIR}/" + COMMAND cp -f template_parser.out "${CMAKE_CURRENT_SOURCE_DIR}/" + COMMAND mv -f template_parser.c template_parser.cpp + MAIN_DEPENDENCY template_parser.lemon + DEPENDS debugger.h +) + + + add_custom_command( OUTPUT loadtrap.cpp COMMAND ragel -p -G2 -o loadtrap.cpp "${CMAKE_CURRENT_SOURCE_DIR}/loadtrap.rl" @@ -39,16 +53,25 @@ add_custom_command( ) -set_source_files_properties( - loadtrap.cpp lexer.cpp - PROPERTIES - COMPILE_FLAGS - "${CMAKE_CXX_FLAGS} -Wno-unused-variable" +add_custom_command( + OUTPUT template_loader.cpp + COMMAND ragel -p -G2 -o template_loader.cpp "${CMAKE_CURRENT_SOURCE_DIR}/template_loader.rl" + MAIN_DEPENDENCY template_loader.rl + DEPENDS debugger.h template_parser.h ) +set_source_files_properties( + loadtrap.cpp lexer.cpp template_loader.cpp + PROPERTIES + COMPILE_FLAGS + "${CMAKE_CXX_FLAGS} -Wno-unused-variable" +) + add_executable(mpw loader.cpp debugger.cpp debugger_internal.cpp - address_map.cpp lexer.cpp parser.cpp loadtrap.cpp commands.cpp) + address_map.cpp lexer.cpp parser.cpp loadtrap.cpp + commands.cpp + template_loader.cpp template_parser.cpp) target_link_libraries(mpw CPU_LIB) target_link_libraries(mpw TOOLBOX_LIB) diff --git a/bin/template.h b/bin/template.h new file mode 100644 index 0000000..68c1c93 --- /dev/null +++ b/bin/template.h @@ -0,0 +1,45 @@ +#ifndef __debug_template_h__ +#define __debug_template_h__ + +#include + +namespace Debug { + + + + enum { + kUnknown = 0, + kStringPtr, // p-string + kCStringPtr, // c-string + kPtr, + kOSType, // four-cc + kBoolean, // unsigned char, display true/false + }; + + + struct Template; + struct FieldEntry; + + struct FieldEntry { + std::string *name; + unsigned type; + unsigned count; + Template *tmpl; + FieldEntry *next; + }; + + struct Template { + unsigned type; // 0 for structs, < 256 for types. + + FieldEntry *firstField; + unsigned size; + }; + + void CreateTypedef(const std::string *name, int type); + void CreateTemplate(const std::string *name, FieldEntry *firstField); + + + extern std::unordered_map Templates; + extern int TemplateLine; +} +#endif diff --git a/bin/template_loader.rl b/bin/template_loader.rl new file mode 100644 index 0000000..97b24ab --- /dev/null +++ b/bin/template_loader.rl @@ -0,0 +1,288 @@ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "template_parser.h" +#include "template.h" + +namespace { + + int tox(char c) + { + c |= 0x20; // lowercase it. + if (c >= '0' && c <= '9') return c - '0'; + if (c >= 'a' && c <= 'f') return c - 'a' + 10; + return 0; + } + uint32_t scan10(const char *begin, const char *end) + { + return std::accumulate(begin, end, 0, + [](uint32_t value, char c){ + return value * 10 + c - '0'; + }); + } + + uint32_t scan16(const char *begin, const char *end) + { + return std::accumulate(begin, end, 0, + [](uint32_t value, char c){ + return (value << 4) + tox(c); + }); + } + + + + unsigned int DJBHash(const char* begin, const char *end) + { + unsigned int hash = 5381; + unsigned int i = 0; + + for(const char *iter = begin; iter != end; ++iter) + { + hash = ((hash << 5) + hash) + (*iter); + } + + return hash; + } + + std::unordered_multimap InternTable; + + const std::string *InternString(const char *begin, const char *end) + { + bool found = false; + unsigned hash = DJBHash(begin, end); + size_t length = end - begin; + + auto range = InternTable.equal_range(hash); + auto iter = range.first; + auto endit = range.second; + + for( ; iter != endit; ++iter) + { + // hash matches, make sure the string does. + const std::string *s = iter->second; + + if (s->length() == length && std::memcmp(s->data(), begin, length) == 0) + return s; + } + + // insert it. + std::string *s = new std::string(begin, end); + InternTable.emplace(std::make_pair(hash, s)); + return s; + } + +} + +void TemplateParse(void *yyp, int yymajor, void *yyminor); +void *TemplateParseAlloc(void *(*mallocProc)(size_t)); +void TemplateParseFree(void *p, void (*freeProc)(void*)); + +void TemplateParse(void *yyp, int yymajor, int yyminor) +{ + TemplateParse(yyp, yymajor, &yyminor); +} + +void TemplateParse(void *yyp, int yymajor, const std::string *yyminor) +{ + TemplateParse(yyp, yymajor, (void *)yyminor); +} + +%%{ + machine lexer; + + # this exits with cs == lexer_en_error. + error := any* ${ fbreak; }; + + block_comment := |* + [\n\r] { TemplateLine++; }; + '*/' { fgoto main; }; + any ; + *|; + + main := |* + + [\n\r] { TemplateLine++; }; + [ \t]+; + + '//' [^\r\n]* ; + '/*' { fgoto block_comment; }; + + ';' { TemplateParse(parser, tkSEMI, 0); }; + '{' { TemplateParse(parser, tkLBRACE, 0); }; + '}' { TemplateParse(parser, tkRBRACE, 0); }; + '[' { TemplateParse(parser, tkLBRACKET, 0); }; + ']' { TemplateParse(parser, tkRBRACKET, 0); }; + '*' { TemplateParse(parser, tkSTAR, 0); }; + + 'struct' { TemplateParse(parser, tkSTRUCT, 0); }; + 'typedef' { TemplateParse(parser, tkTYPEDEF, 0); }; + + + 'int' { TemplateParse(parser, tkINT, 0); }; + 'long' { TemplateParse(parser, tkLONG, 0); }; + 'short' { TemplateParse(parser, tkSHORT, 0); }; + 'volatile' { TemplateParse(parser, tkVOLATILE, 0); }; + #'const' { TemplateParse(parser, tkCONST, 0); }; + 'char' { TemplateParse(parser, tkCHAR, 0); }; + #'bool' { TemplateParse(parser, tkBOOL, 0); }; + 'void' { TemplateParse(parser, tkVOID, 0); }; + + 'signed' { TemplateParse(parser, tkSIGNED, 0); }; + 'unsigned' { TemplateParse(parser, tkUNSIGNED, 0); }; + + 'int64_t' { TemplateParse(parser, tkTYPECODE, 'q'); }; + 'uint64_t' { TemplateParse(parser, tkTYPECODE, 'Q'); }; + + 'int32_t' { TemplateParse(parser, tkTYPECODE, 'l'); }; + 'uint32_t' { TemplateParse(parser, tkTYPECODE, 'L'); }; + + 'int16_t' { TemplateParse(parser, tkTYPECODE, 's'); }; + 'uint16_t' { TemplateParse(parser, tkTYPECODE, 'S'); }; + + 'int8_t' { TemplateParse(parser, tkTYPECODE, 'c'); }; + 'uint8_t' { TemplateParse(parser, tkTYPECODE, 'C'); }; + + + 'StringPtr' { TemplateParse(parser, tkTYPECODE, kStringPtr); }; + 'CStringPtr' { TemplateParse(parser, tkTYPECODE, kCStringPtr); }; + 'Ptr' { TemplateParse(parser, tkTYPECODE, kPtr); }; + 'OSType' { TemplateParse(parser, tkTYPECODE, kOSType); }; + 'Boolean' { TemplateParse(parser, tkTYPECODE, kBoolean); }; + + + # numbers. negative numbers are not allowed. + + '0x'i xdigit+ { + // hexadecimal + uint32_t value = scan16(ts + 2, te); + TemplateParse(parser, tkINTEGER, value); + }; + + digit+ { + uint32_t value = scan10(ts, te); + TemplateParse(parser, tkINTEGER, value); + }; + + # identifier ... but also need to check if it's a type. + [A-Za-z_][A-Za-z0-9_]+ { + + // intern the string. + + const std::string *name = InternString(ts, te); + + auto iter = Templates.find(*name); + if (iter != Templates.end()) + { + unsigned type = iter->second->type; + if (type) TemplateParse(parser, tkTYPECODE, type); + else TemplateParse(parser, tkTEMPLATE, iter->second); + } + else + { + TemplateParse(parser, tkIDENTIFIER, name); + } + }; + + *|; + +}%% + +namespace Debug { + +std::unordered_map Templates; + +void CreateTypedef(const std::string *name, int type) +{ +} +void CreateTemplate(const std::string *name, FieldEntry *firstField) +{ +} + +int TemplateLine; + +bool ParseTemplates(const std::string &filename) +{ + %% write data; + + void *parser; + + int fd; + struct stat st; + char *buffer; + + if (stat(filename.c_str(), &st) < 0) return false; + if (st.st_size == 0) return false; + + fd = open(filename.c_str(), O_RDONLY); + if (fd < 0) { + perror("Error opening template file: "); + return false; + } + + buffer = (char *)mmap(nullptr, st.st_size, PROT_READ, MAP_FILE | MAP_SHARED, fd, 0); + if (buffer == MAP_FAILED) { + perror("Error mapping template file: "); + close(fd); + return false; + } + close(fd); + + + + + parser = TemplateParseAlloc(malloc); + + + TemplateLine = 1; + + const char *p = buffer; + const char *pe = buffer + st.st_size; + const char *eof = pe; + const char *ts; + const char *te; + int cs, act; + + for(;;) + { + + %% write init; + %% write exec; + + if (cs == lexer_error) + { + fprintf(stderr, "illegal character: `%c'\n", *p); + TemplateParseFree(parser, free); + munmap(buffer, st.st_size); + return false; + } + if (cs == lexer_en_error) + { + TemplateParseFree(parser, free); + munmap(buffer, st.st_size); + return false; + } + if (p == pe) + { + TemplateParse(parser, tkEOF, 0); + break; + } + } + + TemplateParse(parser, 0, 0); + TemplateParseFree(parser, free); + + munmap(buffer, st.st_size); + + return true; +} + +} \ No newline at end of file diff --git a/bin/template_parser.lemon b/bin/template_parser.lemon new file mode 100644 index 0000000..9cb1e20 --- /dev/null +++ b/bin/template_parser.lemon @@ -0,0 +1,114 @@ +%token_prefix tk +%name TemplateParse + +%include { + + #include + #include + + #include "template.h" + + using namespace Debug; +} + +%type struct_fields { FieldEntry * } +%type struct_field { FieldEntry * } +%type array_count { int } +%type opt_star { int } +%type typecode { int } +%type type { int } + +start ::= templates EOF. + +templates ::= templates struct. +templates ::= templates typedef. +templates ::= . + +typedef ::= TYPEDEF type(a) IDENTIFIER(b). { + CreateTypedef((std::string *)b, a); +} + +struct ::= STRUCT IDENTIFIER(a) LBRACE struct_fields(b) RBRACE SEMI. +{ + CreateTemplate((std::string *)a, b); +} + +struct_fields(rhs) ::= struct_fields(a) struct_field(b). { + // reverse order? + b->next = a; + rhs = b; +} + +struct_fields(rhs) ::= struct_field(a). { + rhs = a; +} + +struct_field(rhs) ::= type(a) IDENTIFIER(b) array_count(c) SEMI. +{ + FieldEntry *e = (FieldEntry *)calloc(sizeof(FieldEntry), 1); + + e->name = (std::string *)b; + e->type = a; + e->count = c; + rhs = e; +} + +struct_field(rhs) ::= opt_volatile TEMPLATE(a) opt_star(star) IDENTIFIER(b) array_count(c) SEMI. { + FieldEntry *e = (FieldEntry *)calloc(sizeof(FieldEntry), 1); + + e->name = (std::string *)b; + e->type = star ? kPtr : 0; + e->tmpl = (Template *)a; + e->count = c; + + rhs = e; +} + +array_count(rhs) ::= . { rhs = -1; } +array_count(rhs) ::= LBRACKET INTEGER(a) RBRACKET. { rhs = *(int *)a; } + +// this is an expected error... +type(rhs) ::= opt_volatile IDENTIFIER(xxx). { + + // ugh, Lemon will blindly replace text within a string. + fprintf(stderr, "Template error: line %u: %s is not a known type.\n", + TemplateLine, ((std::string *)xxx)->c_str()); + + rhs = 'i'; +} + +type(rhs) ::= opt_volatile typecode(a). { rhs = a; } + +opt_volatile ::= . +opt_volatile ::= VOLATILE. + +typecode(rhs) ::= SIGNED. { rhs = 'i'; } +typecode(rhs) ::= UNSIGNED. {rhs = 'I'; } + +typecode(rhs) ::= opt_signed CHAR. { rhs = 'c'; } +typecode(rhs) ::= UNSIGNED CHAR. { rhs = 'C'; } + +typecode(rhs) ::= opt_signed SHORT. { rhs = 's'; } +typecode(rhs) ::= UNSIGNED SHORT. { rhs = 'S'; } + +typecode(rhs) ::= opt_signed LONG opt_int. { rhs = 'l'; } +typecode(rhs) ::= UNSIGNED LONG opt_int. { rhs = 'L'; } + +typecode(rhs) ::= opt_signed LONG LONG. { rhs = 'q'; } +typecode(rhs) ::= UNSIGNED LONG LONG. { rhs = 'Q'; } + +typecode(rhs) ::= TYPECODE(a). { rhs = *(int *)a; } + +/* pointers are not fully supported yet */ +typecode(rhs) ::= VOID STAR. { rhs = kPtr; } + +opt_signed ::= . +opt_signed ::= SIGNED. + +opt_int ::= . +opt_int ::= INT. + + +opt_star(rhs) ::= . { rhs = 0; } +opt_star(rhs) ::= STAR. { rhs = 1; } +