From cb554ed40f838624c8aad01aac25de4407b7018a Mon Sep 17 00:00:00 2001 From: Wolfgang Thaller Date: Sun, 5 Oct 2014 23:52:34 +0200 Subject: [PATCH] Rez: successful parse. --- CMakeLists.txt | 1 + Rez/CMakeLists.txt | 49 +++++++ Rez/Rez.cc | 67 ++++++++++ Rez/RezLexer.cc | 116 +++++++++++++++++ Rez/RezLexer.h | 28 ++++ Rez/RezLexerNextToken.cc | 192 +++++++++++++++++++++++++++ Rez/RezLexerWaveToken.h | 15 +++ Rez/RezParser.yy | 271 +++++++++++++++++++++++++++++++++++++++ Rez/Test.r | 11 ++ 9 files changed, 750 insertions(+) create mode 100644 Rez/CMakeLists.txt create mode 100644 Rez/Rez.cc create mode 100644 Rez/RezLexer.cc create mode 100644 Rez/RezLexer.h create mode 100644 Rez/RezLexerNextToken.cc create mode 100644 Rez/RezLexerWaveToken.h create mode 100644 Rez/RezParser.yy create mode 100644 Rez/Test.r diff --git a/CMakeLists.txt b/CMakeLists.txt index 67f1b96c41..f46cdec399 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -69,4 +69,5 @@ add_subdirectory(Launcher) else() add_subdirectory(MakeAPPL) add_subdirectory(ASFilter) +add_subdirectory(Rez) endif() diff --git a/Rez/CMakeLists.txt b/Rez/CMakeLists.txt new file mode 100644 index 0000000000..ae7c33bf55 --- /dev/null +++ b/Rez/CMakeLists.txt @@ -0,0 +1,49 @@ +# Copyright 2012 Wolfgang Thaller. +# +# This file is part of Retro68. +# +# Retro68 is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# Retro68 is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with Retro68. If not, see . + +cmake_minimum_required(VERSION 2.8) + +set(CMAKE_CXX_FLAGS "--std=c++11") + +find_package(Boost COMPONENTS wave filesystem system thread regex) + + +find_package(BISON REQUIRED) + +include_directories(. ${CMAKE_CURRENT_BINARY_DIR}) + +add_custom_command( + DEPENDS RezParser.yy + COMMAND ${BISON_EXECUTABLE} + ARGS -o ${CMAKE_CURRENT_BINARY_DIR}/RezParser.generated.cc + ${CMAKE_CURRENT_SOURCE_DIR}/RezParser.yy --graph + COMMENT "Generating parser.cpp" + OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/RezParser.generated.cc ${CMAKE_CURRENT_BINARY_DIR}/RezParser.generated.hh +) +add_executable(Rez + Rez.cc + + RezParser.yy RezParser.generated.hh RezParser.generated.cc + + RezLexer.h + RezLexer.cc + RezLexerWaveToken.h + RezLexerNextToken.cc + ) +target_link_libraries(Rez ${Boost_LIBRARIES}) + +install(TARGETS Rez RUNTIME DESTINATION bin) diff --git a/Rez/Rez.cc b/Rez/Rez.cc new file mode 100644 index 0000000000..91560796d7 --- /dev/null +++ b/Rez/Rez.cc @@ -0,0 +1,67 @@ +#include +#include + + +#include "RezParser.generated.hh" +#include "RezLexer.h" + + +int main() +{ + //RezLexer lexer("/home/wolfgang/Projects/Retro68/RIncludes/Types.r"); + RezLexer lexer("/home/wolfgang/Projects/Retro68/Rez/Test.r"); + + RezParser parser(lexer); + + parser.parse(); + + /* + // The following preprocesses a given input file. + // Open the file and read it into a string variable + std::ifstream instream("/home/wolfgang/Projects/Retro68/RIncludes/Types.r"); + + std::string input( + std::istreambuf_iterator(instream.rdbuf()), + std::istreambuf_iterator()); + + context_type ctx(input.begin(), input.end(), "Types.r"); + + // At this point you may want to set the parameters of the + // preprocessing as include paths and/or predefined macros. + ctx.add_include_path("/home/wolfgang/Projects/Retro68/RIncludes"); + // ctx.add_macro_definition(...); + + auto first = ctx.begin(); + auto last = ctx.end(); + + std::ostringstream out; + + try + { + while(first != last) + { + out << (*first).get_value(); + ++first; + } + } + catch(boost::wave::preprocess_exception& e) + { + std::cout << e.file_name() << ":" << e.line_no() << ": "; + std::cout << e.description() << std::endl; + } + + std::string str = out.str(); + + + std::cout << str.substr(0,100) << std::endl;*/ + +/* + int i = 0; + while (first != last) { + std::cout << i << ": " << get_token_name(token_id(*first)) << " <<" << (*first).get_value() << ">>\n"; + ++first; + if(++i > 10) + break; + }*/ + return 0; +} diff --git a/Rez/RezLexer.cc b/Rez/RezLexer.cc new file mode 100644 index 0000000000..739664b524 --- /dev/null +++ b/Rez/RezLexer.cc @@ -0,0 +1,116 @@ +#include "RezLexer.h" + +#include +#include +#include +#include + +#include "RezLexerWaveToken.h" + +namespace wave = boost::wave; + +using namespace boost::wave; + +struct load_file_to_string_filtered +{ + template + class inner + { + public: + template + static void init_iterators(IterContextT &iter_ctx, + PositionT const &act_pos, language_support language) + { + typedef typename IterContextT::iterator_type iterator_type; + + // read in the file + std::ifstream instream(iter_ctx.filename.c_str()); + if (!instream.is_open()) { + BOOST_WAVE_THROW_CTX(iter_ctx.ctx, preprocess_exception, + bad_include_file, iter_ctx.filename.c_str(), act_pos); + return; + } + instream.unsetf(std::ios::skipws); + + std::string str(std::istreambuf_iterator(instream.rdbuf()), + std::istreambuf_iterator()); + + boost::regex endif("#endif[^\r\n]*"); + str = boost::regex_replace(str, endif, "#endif"); + + boost::regex dollar_escape("\\\\\\$([a-zA-Z0-9][a-zA-Z0-9])"); + str = boost::regex_replace(str, dollar_escape, "\\x$1"); + + + iter_ctx.instring = str; + + iter_ctx.first = iterator_type( + iter_ctx.instring.begin(), iter_ctx.instring.end(), + PositionT(iter_ctx.filename), language); + iter_ctx.last = iterator_type(); + } + + private: + std::string instring; + }; +}; + + + +typedef wave::cpplexer::lex_iterator< + wave::cpplexer::lex_token<> > + lex_iterator_type; +typedef wave::context< + std::string::iterator, lex_iterator_type, + load_file_to_string_filtered> + context_type; +typedef context_type::iterator_type pp_iterator_type; + +struct RezLexer::Priv +{ + std::string input; + context_type ctx; + pp_iterator_type iter; + + Priv(std::string data, std::string name) + : input(data), ctx(input.begin(), input.end(), name.c_str()) + { + } +}; + +RezLexer::RezLexer(std::string filename) +{ + std::ifstream instream(filename); + + pImpl.reset(new Priv(std::string( + std::istreambuf_iterator(instream.rdbuf()), + std::istreambuf_iterator()), + filename)); + + pImpl->ctx.add_include_path("/home/wolfgang/Projects/Retro68/RIncludes"); + // ctx.add_macro_definition(...); + pImpl->ctx.add_macro_definition("DeRez", "0"); + + pImpl->iter = pImpl->ctx.begin(); +} + +RezLexer::~RezLexer() +{ + +} + +bool RezLexer::atEnd() +{ + return pImpl->iter == pImpl->ctx.end(); +} + +RezLexer::WaveToken RezLexer::nextWave() +{ + return pImpl->iter == pImpl->ctx.end() ? WaveToken() : (*pImpl->iter++); +} + +RezLexer::WaveToken RezLexer::peekWave() +{ + return pImpl->iter == pImpl->ctx.end() ? WaveToken() : *pImpl->iter; +} + diff --git a/Rez/RezLexer.h b/Rez/RezLexer.h new file mode 100644 index 0000000000..c0e7f12cea --- /dev/null +++ b/Rez/RezLexer.h @@ -0,0 +1,28 @@ +#ifndef REZLEXER_H +#define REZLEXER_H + +#include + +class RezSymbol; + +class RezLexer +{ + struct Priv; + std::unique_ptr pImpl; + + std::string curFile; + + class WaveToken; + + bool atEnd(); + WaveToken nextWave(); + WaveToken peekWave(); + +public: + RezLexer(std::string filename); + ~RezLexer(); + + RezSymbol nextToken(); +}; + +#endif // REZLEXER_H diff --git a/Rez/RezLexerNextToken.cc b/Rez/RezLexerNextToken.cc new file mode 100644 index 0000000000..f862edbf02 --- /dev/null +++ b/Rez/RezLexerNextToken.cc @@ -0,0 +1,192 @@ +#include "RezLexer.h" +#include "RezLexerWaveToken.h" +#include "RezParser.generated.hh" +#include + +#include + +using namespace boost::wave; + +static int readInt(const char *str) +{ + int x = 0; + + int base = 10; + + if(*str == '0') + { + base = 8; + ++str; + if(*str == 'x' || *str == 'X') + { + base = 16; + ++str; + } + if(*str == 'b' || *str == 'B') + { + base = 2; + ++str; + } + } + else if(*str == 'b' || *str == 'B') + { + base = 2; + ++str; + } + + while(*str) + { + x *= base; + if(*str >= 'a' && *str <= 'z') + x += *str - 'a'; + else if(*str >= 'A' && *str <= 'Z') + x += *str - 'A' + 10; + else if(*str >= '0' && *str <= '9') + x += *str - '0'; + *str++; + } + + return x; +} + +RezSymbol RezLexer::nextToken() +{ + for(auto tok = nextWave(); tok != T_EOI && tok != T_EOF; tok = nextWave()) + { + if(IS_CATEGORY(tok, WhiteSpaceTokenType)) + continue; + else if(IS_CATEGORY(tok, EOLTokenType)) + continue; + else if(tok == T_PP_LINE) + { + while(tok != T_EOI && tok != T_EOF && !IS_CATEGORY(tok, EOLTokenType)) + tok = nextWave(); + continue; + } + else + { + //std::cout << "{" << std::hex << (token_id)tok << std::dec << "|" << tok.get_value() << "}\n"; + + auto pos = tok.get_position(); + curFile = pos.get_file().c_str(); + auto yypos = yy::position(&curFile, pos.get_line(), pos.get_column()); + yy::location loc(yypos); + + if(tok == (UnknownTokenType | '"')) + { + return RezParser::make_STRINGLIT("Hello, world.", loc); + } + else if(IS_CATEGORY(tok, IdentifierTokenType) || IS_CATEGORY(tok, KeywordTokenType) || IS_CATEGORY(tok, BoolLiteralTokenType)) + { + typedef decltype(&RezParser::make_TYPE) memfun; +#define KEYWORD(upper, lower) \ +{ lower, &RezParser::make_ ## upper } + + static std::unordered_map keywords = { + KEYWORD(TYPE, "type"), + KEYWORD(RESOURCE, "resource"), + + KEYWORD(ARRAY,"array"), + KEYWORD(SWITCH, "switch"), + KEYWORD(CASE, "case"), + KEYWORD(AS, "as"), + KEYWORD(FILL,"fill"), + KEYWORD(ALIGN, "align"), + KEYWORD(HEX,"hex"), + KEYWORD(KEY, "key"), + KEYWORD(WIDE,"wide"), + KEYWORD(UNSIGNED, "unsigned"), + KEYWORD(LITERAL, "literal"), + KEYWORD(BOOLEAN, "boolean"), + KEYWORD(BIT, "bit"), + KEYWORD(BYTE, "byte"), + KEYWORD(CHAR, "char"), + KEYWORD(WORD, "word"), + KEYWORD(INTEGER, "integer"), + KEYWORD(LONG, "long"), + KEYWORD(LONGINT, "longint"), + KEYWORD(PSTRING, "pstring"), + KEYWORD(PSTRING, "wstring"), + KEYWORD(STRING, "string"), + KEYWORD(POINT, "point"), + KEYWORD(RECT, "rect"), + KEYWORD(BITSTRING, "bitstring"), + + KEYWORD(INTEGER, "int"), + + }; + + std::string s = tok.get_value().c_str(); + std::string lower = s; + std::transform(lower.begin(), lower.end(), lower.begin(), ::tolower); + auto p = keywords.find(lower); + if(p == keywords.end()) + { + //std::cout << "id: " << s << std::endl; + return RezParser::make_IDENTIFIER(lower, loc); + } + else + { + //std::cout << "key: " << s << std::endl; + return (*p->second)(loc); + } + } + else if(tok == T_INTLIT) + { + if(tok.get_value() == "0") + { + auto tok2 = peekWave(); + while(tok2 != T_EOI && tok2 != T_EOF && IS_CATEGORY(tok2, WhiteSpaceTokenType)) + nextWave(), tok2 = peekWave(); + + //std::cout << "!" << std::hex << (token_id)tok2 << std::dec << "|" << tok2.get_value() << "!\n"; + static boost::regex binlit("[bB][01]+"); + if(tok2 == T_IDENTIFIER && boost::regex_match(tok2.get_value().c_str(), binlit)) + tok = nextWave(); + } + return RezParser::make_INTLIT(readInt(tok.get_value().c_str()), loc); + } + else + { +#define NOVAL_TOK(name) \ +case T_ ## name: /*std::cout << #name << std::endl;*/ return RezParser::make_ ## name(loc) + switch(token_id(tok)) + { + case T_INTLIT: return RezParser::make_INTLIT(readInt(tok.get_value().c_str()), loc); + + case T_CHARLIT: return RezParser::make_CHARLIT(tok.get_value().c_str(), loc); + case T_STRINGLIT: return RezParser::make_STRINGLIT(tok.get_value().c_str(), loc); + + NOVAL_TOK(LEFTBRACE); + NOVAL_TOK(RIGHTBRACE); + NOVAL_TOK(LEFTBRACKET); + NOVAL_TOK(RIGHTBRACKET); + NOVAL_TOK(LEFTPAREN); + NOVAL_TOK(RIGHTPAREN); + NOVAL_TOK(SEMICOLON); + NOVAL_TOK(COMMA); + NOVAL_TOK(PLUS); + NOVAL_TOK(MINUS); + NOVAL_TOK(DIVIDE); + NOVAL_TOK(STAR); + NOVAL_TOK(ASSIGN); + NOVAL_TOK(COLON); + NOVAL_TOK(SHIFTLEFT); + NOVAL_TOK(SHIFTRIGHT); + NOVAL_TOK(EQUAL); + NOVAL_TOK(NOTEQUAL); + NOVAL_TOK(AND); + NOVAL_TOK(OR); + NOVAL_TOK(XOR); + NOVAL_TOK(COMPL); + + default: + + return RezParser::make_BADTOKEN(tok.get_value().c_str(), loc); + } + + } + } + } + return RezSymbol(); +} diff --git a/Rez/RezLexerWaveToken.h b/Rez/RezLexerWaveToken.h new file mode 100644 index 0000000000..3d11903b0b --- /dev/null +++ b/Rez/RezLexerWaveToken.h @@ -0,0 +1,15 @@ +#ifndef REZLEXERWAVETOKEN_H +#define REZLEXERWAVETOKEN_H + +#include "RezLexer.h" + +#include + +class RezLexer::WaveToken : public boost::wave::cpplexer::lex_token<> +{ +public: + WaveToken() = default; + WaveToken(const boost::wave::cpplexer::lex_token<> & o) : boost::wave::cpplexer::lex_token<>(o) {} +}; + +#endif // REZLEXERWAVETOKEN_H diff --git a/Rez/RezParser.yy b/Rez/RezParser.yy new file mode 100644 index 0000000000..ed5dacad8e --- /dev/null +++ b/Rez/RezParser.yy @@ -0,0 +1,271 @@ +%require "3.0.2" +%defines +%define parser_class_name {RezParser} +%skeleton "lalr1.cc" + +%locations; + +%define api.token.constructor +%define api.value.type variant +%define parse.assert + +%token IDENTIFIER; +%token CHARLIT; +%token STRINGLIT; +%token INTLIT; + +%token BADTOKEN; + + +%token LEFTBRACE "{"; +%token RIGHTBRACE "}"; +%token LEFTBRACKET "["; +%token RIGHTBRACKET "]"; +%token LEFTPAREN "("; +%token RIGHTPAREN ")"; +%token SEMICOLON ";"; +%token COMMA ","; +%token PLUS "+"; +%token MINUS "-"; +%token DIVIDE "/"; +%token STAR "*"; +%token ASSIGN "="; +%token COLON ":"; +%token SHIFTLEFT "<<"; +%token SHIFTRIGHT ">>"; +%token EQUAL "=="; +%token NOTEQUAL "!="; +%token AND "&"; +%token OR "|"; +%token XOR "^"; +%token COMPL "~"; + + +%token TYPE "type"; +%token RESOURCE "resource"; +%token ARRAY "array"; +%token SWITCH "switch"; +%token CASE "case"; +%token AS "as"; +%token FILL "fill"; +%token ALIGN "align"; +%token HEX "hex"; +%token KEY "key"; +%token WIDE "wide"; +%token LITERAL "literal"; +%token UNSIGNED "unsigned"; + +%token BOOLEAN "boolean"; +%token BIT "bit"; +%token BYTE "byte"; +%token CHAR "char"; +%token WORD "word"; +%token INTEGER "integer"; +%token LONG "long"; +%token LONGINT "longint"; +%token PSTRING "pstring"; +%token WSTRING "wstring"; +%token STRING "string"; +%token POINT "point"; +%token RECT "rect"; +%token BITSTRING "bitstring"; + +/* +%left "|"; +%left "^"; +%left "&"; +%left "==" "!="; +%left ">>" "<<"; +%left "+" "-"; +%left "*" "/"; +*/ + +%param { RezLexer& lexer } + +%code requires { + #define YY_NULLPTR nullptr + class RezLexer; +} + +%code provides { + using yy::RezParser; + //using RezSymbol = yy::RezParser::symbol_type; + + class RezSymbol : public yy::RezParser::symbol_type + { + public: + RezSymbol() = default; + RezSymbol(const yy::RezParser::symbol_type& x) : yy::RezParser::symbol_type(x) {} + }; +} + +%code { + #include "RezLexer.h" + static yy::RezParser::symbol_type yylex(RezLexer& lexer) + { + return lexer.nextToken(); + } + + void yy::RezParser::error(const location_type& loc, std::string const& err) + { + std::cerr << loc << ": " << err << std::endl; + } +} + +%% +%start rez; + +rez : %empty + | rez type_definition ";" + | rez resource ";" + ; + +simpletype : "boolean" + | "bit" | "byte" | "word" | "integer" | "long" | "longint" | "rect" + | "point" + | "char" + | "pstring" array_count_opt + | "wstring" array_count_opt + | "string" array_count_opt; + | "bitstring" "[" expression "]"; + +type_definition : "type" type_spec "{" field_definitions "}" + { std::cout << "TYPE " << $2 << std::endl; } + | "type" type_spec "as" type_spec + { std::cout << "TYPE " << $2 << std::endl; } + ; + +%type type_spec; +type_spec : CHARLIT { $$ = $1; } + | CHARLIT "(" INTLIT ")" { $$ = $1; } + ; + +field_definitions : %empty + | field_definitions IDENTIFIER ":" + | field_definitions ";" + | field_definitions field_definition ";" ; + +field_definition: simple_field_definition + | array_definition + | switch_definition + | fill_statement + | align_statement; + +fill_statement : "fill" fill_unit array_count_opt; +align_statement : "align" fill_unit; + +fill_unit : "bit" | "byte" | "word" | "long"; + +simple_field_definition: field_attributes simpletype value_spec; + +value_spec : %empty + | named_values + | "=" expression; + +named_values: named_value + | named_values "," named_value + | named_values named_value; + +named_value : IDENTIFIER + | IDENTIFIER "=" expression ; + +array_definition: array_attributes "array" array_name_opt array_count_opt "{" field_definitions "}" ; + +array_count : "[" expression "]" ; +array_count_opt : %empty | array_count ; + +array_name_opt : %empty | IDENTIFIER ; + +array_attributes: %empty | "wide" ; +field_attributes: %empty | field_attributes field_attribute; +field_attribute : "hex" | "key" | "unsigned" | "literal"; + +switch_definition: "switch" "{" + switch_cases + "}" ; + +switch_cases : %empty | switch_cases switch_case ; + +switch_case : "case" IDENTIFIER ":" field_definitions ; + +/* +expression + | expression "^" expression + | expression "&" expression + | expression "|" expression + | "~" expression + | expression "==" expression + | expression "!=" expression + | expression ">>" expression + | expression "<<" expression + | expression "+" expression + | expression "-" expression + | "-" expression + | expression "/" expression + | expression "*" expression + ; +*/ + +expression : expression1 + | expression "^" expression1 + ; + +expression1 : expression2 + | expression1 "&" expression2 + ; + +expression2 : expression3 + | expression2 "|" expression3 + ; + +expression3 : expression4 + | expression3 "==" expression4 + | expression3 "!=" expression4 + ; + +expression4 : expression5 + | expression4 ">>" expression5 + | expression4 "<<" expression5 + ; + +expression5 : expression6 + | expression5 "+" expression6 + | expression5 "-" expression6 + ; + +expression6 : expression7 + | expression6 "*" expression7 + | expression6 "/" expression7 + ; +expression7 : expression8 + | "-" expression7 + | "+" expression7 + | "~" expression7 + ; + +expression8 : INTLIT + | CHARLIT + | STRINGLIT + | IDENTIFIER + | IDENTIFIER "(" function_argument_list ")" + | IDENTIFIER "[" function_argument_list1 "]" + | "(" expression ")" + | "{" resource_body "}" + ; + +function_argument_list : %empty | function_argument_list1 ; +function_argument_list1 : expression | function_argument_list "," expression ; + +resource: "resource" CHARLIT "(" function_argument_list ")" "{" resource_body "}" + { std::cout << "RESOURCE " << $2 << std::endl; } + +resource_body : %empty | resource_body1 ; +resource_body1 : resource_item + | resource_body1 "," resource_item + | resource_body1 ";" resource_item + | resource_body1 ";" + ; + +resource_item : expression | IDENTIFIER "{" resource_body "}" ; + +%% diff --git a/Rez/Test.r b/Rez/Test.r new file mode 100644 index 0000000000..173917529b --- /dev/null +++ b/Rez/Test.r @@ -0,0 +1,11 @@ +/*#include "Types.r" + + +*/ + +#include "/home/wolfgang/Projects/Retro68/CExamples/Sample.r" + +type 'TEST' { +boolean itemUnlocked = false, // defined attributes bits... + itemLocked = true; +};