From b8506ee7d44c76a0efba24f4ce3063cc73370bd6 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Mon, 8 Jan 2018 03:31:23 +0100 Subject: [PATCH] optimize, tests, refactor --- README.md | 4 +- il65/__main__.py | 3 +- il65/compiler.py | 63 +++++++++--------- il65/handwritten/codegen.py | 4 +- il65/handwritten/exprparse.py | 3 +- il65/handwritten/optimize.py | 3 +- il65/handwritten/parse.py | 13 ++-- il65/handwritten/preprocess.py | 3 +- il65/handwritten/symbols.py | 6 +- il65/lib/c64lib.ill | 5 +- il65/lib/il65lib.ill | 5 +- il65/lib/mathlib.ill | 5 +- il65/main.py | 5 +- il65/optimizer.py | 114 +++++++++++++++++++++++++++------ il65/plylexer.py | 7 +- il65/plyparser.py | 107 ++++++++++++++++++++++++++----- il65/symbols.py | 26 ++++++++ reference.md | 4 +- requirements.txt | 2 + tests/test_compiler.py | 5 ++ tests/test_core.py | 20 ++++++ tests/test_optimizer.py | 6 ++ tests/test_parser.py | 111 ++++++++++++++++++++++++++++++++ testsource/conditionals.ill | 4 +- todo.ill | 9 ++- 25 files changed, 425 insertions(+), 112 deletions(-) create mode 100644 il65/symbols.py create mode 100644 requirements.txt create mode 100644 tests/test_compiler.py create mode 100644 tests/test_core.py create mode 100644 tests/test_optimizer.py create mode 100644 tests/test_parser.py diff --git a/README.md b/README.md index 4db22e28b..4e16c0f1c 100644 --- a/README.md +++ b/README.md @@ -1,9 +1,9 @@ IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors ===================================================================================== -*Written by Irmen de Jong (irmen@razorvine.net)* +*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0* -*Software license: GNU GPL 3.0, see LICENSE* +*Software license: GNU GPL 3.0, see file LICENSE* This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's diff --git a/il65/__main__.py b/il65/__main__.py index 427977c50..4ea58a929 100644 --- a/il65/__main__.py +++ b/il65/__main__.py @@ -1,8 +1,7 @@ """ Programming Language for 6502/6510 microprocessors -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from . import main diff --git a/il65/compiler.py b/il65/compiler.py index dc36122bd..50e84d110 100644 --- a/il65/compiler.py +++ b/il65/compiler.py @@ -1,30 +1,29 @@ """ -Programming Language for 6502/6510 microprocessors +Programming Language for 6502/6510 microprocessors, codename 'Sick' This is the compiler of the IL65 code, that prepares the parse tree for code generation. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import re import os import sys import linecache -from typing import Optional, Tuple, Set, Dict, Any, List +from typing import Optional, Tuple, Set, Dict, Any, no_type_check from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \ - SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters + SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression from .plylexer import SourceRef, print_bold from .optimizer import optimize class ParseError(Exception): def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None: + super().__init__(message) self.sourceref = sourceref - self.msg = message self.sourcetext = sourcetext def __str__(self): - return "{} {:s}".format(self.sourceref, self.msg) + return "{} {:s}".format(self.sourceref, self.args[0]) class PlyParser: @@ -39,6 +38,7 @@ class PlyParser: self.check_directives(module) self.process_imports(module) self.create_multiassigns(module) + self.process_all_expressions(module) if not self.parsing_import: self.determine_subroutine_usage(module) except ParseError as x: @@ -52,49 +52,52 @@ class PlyParser: self.parse_errors += 1 print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args))) + @no_type_check + def process_all_expressions(self, module: Module) -> None: + # process/simplify all expressions (constant folding etc) + for block, parent in module.all_scopes(): + if block.scope: + for node in block.scope.nodes: + if node is None: + print(block, block.scope, block.scope.nodes) + node.process_expressions() + + @no_type_check def create_multiassigns(self, module: Module) -> None: # create multi-assign statements from nested assignments (A=B=C=5), # and optimize TargetRegisters down to single Register if it's just one register. - def simplify_targetregisters(targets: List[Any]) -> List[Any]: - new_targets = [] - for t in targets: - if isinstance(t, TargetRegisters) and len(t.registers) == 1: - t = t.registers[0] - new_targets.append(t) - return new_targets - def reduce_right(assign: Assignment) -> Assignment: if isinstance(assign.right, Assignment): right = reduce_right(assign.right) - targets = simplify_targetregisters(right.left) - assign.left.extend(targets) + assign.left.extend(right.left) assign.right = right.right return assign - for mnode, parent in module.all_scopes(): - if mnode.scope: - for node in mnode.scope.nodes: + for block, parent in module.all_scopes(): + if block.scope: + for node in block.scope.nodes: if isinstance(node, Assignment): - node.left = simplify_targetregisters(node.left) if isinstance(node.right, Assignment): multi = reduce_right(node) assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment) + node.simplify_targetregisters() + @no_type_check def determine_subroutine_usage(self, module: Module) -> None: module.subroutine_usage.clear() - for mnode, parent in module.all_scopes(): - if mnode.scope: - for node in mnode.scope.nodes: + for block, parent in module.all_scopes(): + if block.scope: + for node in block.scope.nodes: if isinstance(node, InlineAssembly): - self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope) + self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, block.scope) elif isinstance(node, SubCall): - self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope) + self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, block.scope) elif isinstance(node, Goto): - self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope) + self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, block.scope) elif isinstance(node, Return): - self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope) + self._parse_return_for_subroutine_usages(module.subroutine_usage, node, block.scope) elif isinstance(node, Assignment): - self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope) + self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, block.scope) def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], subcall: SubCall, parent_scope: Scope) -> None: @@ -265,7 +268,7 @@ class PlyParser: if __name__ == "__main__": description = "Compiler for IL65 language, code name 'Sick'" - print("\n" + description) + print("\n" + description + "\n") plyparser = PlyParser() m = plyparser.parse_file(sys.argv[1]) optimize(m) diff --git a/il65/handwritten/codegen.py b/il65/handwritten/codegen.py index ed542775d..d61fedaba 100644 --- a/il65/handwritten/codegen.py +++ b/il65/handwritten/codegen.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors, codename 'Sick' This is the assembly code generator (from the parse tree) -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import io @@ -259,6 +258,7 @@ class CodeGenerator: self.p("\t.pend\n") def generate_block_vars(self, block: Block) -> None: + # @todo block vars should be re-initialized when the program is run again, and not depend on statically prefilled data! consts = [c for c in block.symbols.iter_constants()] if consts: self.p("; constants") diff --git a/il65/handwritten/exprparse.py b/il65/handwritten/exprparse.py index b2e2dc43b..421cbfa8b 100644 --- a/il65/handwritten/exprparse.py +++ b/il65/handwritten/exprparse.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors This is the expression parser/evaluator. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import ast diff --git a/il65/handwritten/optimize.py b/il65/handwritten/optimize.py index 4d4c671ec..0368eab4d 100644 --- a/il65/handwritten/optimize.py +++ b/il65/handwritten/optimize.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors This is the code to optimize the parse tree. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from typing import List diff --git a/il65/handwritten/parse.py b/il65/handwritten/parse.py index 9ced08794..2a5e04067 100644 --- a/il65/handwritten/parse.py +++ b/il65/handwritten/parse.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors This is the hand-written parser of the IL65 code, that generates a parse tree. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import re @@ -73,7 +72,7 @@ class Parser: if sub_usage is not None: # re-use the (global) subroutine usage tracking self.result.subroutine_usage = sub_usage - self.sourceref = SourceRef(filename, -1, 0) + self.sourceref = SourceRef(filename, -1, 0) # type: ignore if sourcelines: self.lines = sourcelines else: @@ -234,7 +233,7 @@ class Parser: def _parse_2(self) -> None: # parsing pass 2 (not done during preprocessing!) self.cur_block = None - self.sourceref = SourceRef(self.sourceref.file, -1) + self.sourceref = SourceRef(self.sourceref.file, -1) # type: ignore def imm_string_to_var(stmt: AssignmentStmt, containing_block: Block) -> None: if stmt.right.name or not isinstance(stmt.right, StringValue): @@ -358,7 +357,7 @@ class Parser: self._cur_lineidx += 1 try: lineno, line = self.lines[self._cur_lineidx] - self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) + self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) # type: ignore return line except IndexError: return "" @@ -366,7 +365,7 @@ class Parser: def prev_line(self) -> str: self._cur_lineidx -= 1 lineno, line = self.lines[self._cur_lineidx] - self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) + self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) # type: ignore return line def peek_next_line(self) -> str: @@ -382,7 +381,7 @@ class Parser: if num == lineno: sourceline = text.strip() break - return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column)) + return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column)) # type: ignore def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: if typestr == ".byte": diff --git a/il65/handwritten/preprocess.py b/il65/handwritten/preprocess.py index e11a85c54..985d76c0b 100644 --- a/il65/handwritten/preprocess.py +++ b/il65/handwritten/preprocess.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors This is the preprocessing parser of the IL65 code, that only generates a symbol table. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from typing import List, Tuple, Set diff --git a/il65/handwritten/symbols.py b/il65/handwritten/symbols.py index 3898a1839..9a158856b 100644 --- a/il65/handwritten/symbols.py +++ b/il65/handwritten/symbols.py @@ -2,8 +2,7 @@ Programming Language for 6502/6510 microprocessors Here are the symbol (name) operations such as lookups, datatype definitions. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import inspect @@ -357,6 +356,9 @@ class SymbolTable: def iter_labels(self) -> Iterable[LabelDef]: yield from sorted((v for v in self.symbols.values() if isinstance(v, LabelDef))) + def remove_node(self, name: str) -> None: + del self.symbols[name] + def check_identifier_valid(self, name: str, sourceref: SourceRef) -> None: if not name.isidentifier(): raise SymbolError("invalid identifier") diff --git a/il65/lib/c64lib.ill b/il65/lib/c64lib.ill index 91eb4c2df..200623d32 100644 --- a/il65/lib/c64lib.ill +++ b/il65/lib/c64lib.ill @@ -1,9 +1,8 @@ ; IL65 definitions for the Commodore-64 ; Including memory registers, I/O registers, Basic and Kernel subroutines, utility subroutines. ; -; Written by Irmen de Jong (irmen@razorvine.net) -; License: GNU GPL 3.0, see LICENSE -; +; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +; ; ; indent format: TABS, size=8 diff --git a/il65/lib/il65lib.ill b/il65/lib/il65lib.ill index ba6c8f7aa..dce7b038a 100644 --- a/il65/lib/il65lib.ill +++ b/il65/lib/il65lib.ill @@ -1,8 +1,7 @@ ; IL65 internal library routines ; -; Written by Irmen de Jong (irmen@razorvine.net) -; License: GNU GPL 3.0, see LICENSE -; +; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +; ; ; indent format: TABS, size=8 diff --git a/il65/lib/mathlib.ill b/il65/lib/mathlib.ill index 74f80617b..fffe77659 100644 --- a/il65/lib/mathlib.ill +++ b/il65/lib/mathlib.ill @@ -5,9 +5,8 @@ ; http://6502org.wikidot.com/software-math ; http://codebase64.org/doku.php?id=base:6502_6510_maths ; -; Written by Irmen de Jong (irmen@razorvine.net) -; License: GNU GPL 3.0, see LICENSE -; +; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +; ; ; indent format: TABS, size=8 diff --git a/il65/main.py b/il65/main.py index 970cf3fbf..e770a24d0 100644 --- a/il65/main.py +++ b/il65/main.py @@ -1,11 +1,8 @@ -#! /usr/bin/env python3 - """ Programming Language for 6502/6510 microprocessors, codename 'Sick' This is the main program that drives the rest. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import time diff --git a/il65/optimizer.py b/il65/optimizer.py index 660dff0b1..552bbd1dc 100644 --- a/il65/optimizer.py +++ b/il65/optimizer.py @@ -1,12 +1,12 @@ """ -Programming Language for 6502/6510 microprocessors -This is the code to optimize the parse tree. +Programming Language for 6502/6510 microprocessors, codename 'Sick' +This is the optimizer that applies various optimizations to the parse tree. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ -from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment +from typing import no_type_check +from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression from .plylexer import print_warning, print_bold @@ -17,31 +17,70 @@ class Optimizer: def optimize(self) -> None: self.num_warnings = 0 - # self.remove_augmentedassign_incrdecr_nops(block) # @todo self.remove_useless_assigns() - # self.combine_assignments_into_multi(block) # @todo + self.combine_assignments_into_multi() self.optimize_multiassigns() self.remove_unused_subroutines() - # self.optimize_compare_with_zero(block) # @todo + self.optimize_compare_with_zero() self.remove_empty_blocks() - def remove_useless_assigns(self) -> None: + def remove_useless_assigns(self): # remove assignment statements that do nothing (A=A) - for mnode, parent in self.module.all_scopes(): - if mnode.scope: - for assignment in list(mnode.scope.nodes): + # and augmented assignments that have no effect (A+=0) + # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false + for block, parent in self.module.all_scopes(): + if block.scope: + for assignment in list(block.scope.nodes): if isinstance(assignment, Assignment): assignment.left = [lv for lv in assignment.left if lv != assignment.right] if not assignment.left: - mnode.scope.remove_node(assignment) + block.scope.remove_node(assignment) self.num_warnings += 1 - print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref)) + print_warning("{}: removed statement that has no effect".format(assignment.sourceref)) + if isinstance(assignment, AugAssignment): + if isinstance(assignment.right, (int, float)): + if assignment.right == 0 and assignment.operator in ("+=", "-=", "|=", "<<=", ">>=", "^="): + self.num_warnings += 1 + print_warning("{}: removed statement that has no effect".format(assignment.sourceref)) + block.scope.remove_node(assignment) + if assignment.right >= 8 and assignment.operator in ("<<=", ">>="): + self.num_warnings += 1 + print_warning("{}: shifting result is always zero".format(assignment.sourceref)) + new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref) + block.scope.replace_node(assignment, new_stmt) - def optimize_multiassigns(self) -> None: + def combine_assignments_into_multi(self): + # fold multiple consecutive assignments with the same rvalue into one multi-assignment + for block, parent in self.module.all_scopes(): + if block.scope: + rvalue = None + assignments = [] + for stmt in list(block.scope.nodes): + if isinstance(stmt, Assignment): + if assignments: + if stmt.right == rvalue: + assignments.append(stmt) + continue + elif len(assignments) > 1: + # replace the first assignment by a multi-assign with all the others + for stmt in assignments[1:]: + print("{}: joined with previous assignment".format(stmt.sourceref)) + assignments[0].left.extend(stmt.left) + block.scope.remove_node(stmt) + rvalue = None + assignments.clear() + else: + rvalue = stmt.right + assignments.append(stmt) + else: + rvalue = None + assignments.clear() + + def optimize_multiassigns(self): # optimize multi-assign statements (remove duplicate targets, optimize order) - for mnode, parent in self.module.all_scopes(): - if mnode.scope: - for assignment in mnode.scope.nodes: + for block, parent in self.module.all_scopes(): + if block.scope: + for assignment in block.scope.nodes: if isinstance(assignment, Assignment) and len(assignment.left) > 1: # remove duplicates lvalues = set(assignment.left) @@ -51,7 +90,7 @@ class Optimizer: # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) assignment.left = list(lvalues) - def remove_unused_subroutines(self) -> None: + def remove_unused_subroutines(self): # some symbols are used by the emitted assembly code from the code generator, # and should never be removed or the assembler will fail never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB", @@ -66,6 +105,39 @@ class Optimizer: num_discarded += 1 print("discarded {:d} unused subroutines".format(num_discarded)) + def optimize_compare_with_zero(self): + # a conditional goto that compares a value with zero will be simplified + # the comparison operator and rvalue (0) will be removed and the if-status changed accordingly + for block, parent in self.module.all_scopes(): + if block.scope: + for stmt in block.scope.filter_nodes(Goto): + if isinstance(stmt.condition, Expression): + raise NotImplementedError("optimize goto conditionals", stmt.condition) # @todo + # if cond and isinstance(cond.rvalue, (int, float)) and cond.rvalue.value == 0: + # simplified = False + # if cond.ifstatus in ("true", "ne"): + # if cond.comparison_op == "==": + # # if_true something == 0 -> if_not something + # cond.ifstatus = "not" + # cond.comparison_op, cond.rvalue = "", None + # simplified = True + # elif cond.comparison_op == "!=": + # # if_true something != 0 -> if_true something + # cond.comparison_op, cond.rvalue = "", None + # simplified = True + # elif cond.ifstatus in ("not", "eq"): + # if cond.comparison_op == "==": + # # if_not something == 0 -> if_true something + # cond.ifstatus = "true" + # cond.comparison_op, cond.rvalue = "", None + # simplified = True + # elif cond.comparison_op == "!=": + # # if_not something != 0 -> if_not something + # cond.comparison_op, cond.rvalue = "", None + # simplified = True + # if simplified: + # print("{}: simplified comparison with zero".format(stmt.sourceref)) + def remove_empty_blocks(self) -> None: # remove blocks without name and without address, or that are empty for node, parent in self.module.all_scopes(): @@ -94,6 +166,6 @@ def optimize(mod: Module) -> None: opt.optimize() if opt.num_warnings: if opt.num_warnings == 1: - print_bold("there is one optimization warning.") + print_bold("\nThere is one optimization warning.\n") else: - print_bold("there are {:d} optimization warnings.".format(opt.num_warnings)) + print_bold("\nThere are {:d} optimization warnings.\n".format(opt.num_warnings)) diff --git a/il65/plylexer.py b/il65/plylexer.py index 283fab27d..eb0e47ea8 100644 --- a/il65/plylexer.py +++ b/il65/plylexer.py @@ -1,9 +1,8 @@ """ -Programming Language for 6502/6510 microprocessors +Programming Language for 6502/6510 microprocessors, codename 'Sick' This is the lexer of the IL65 code, that generates a stream of tokens for the parser. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import sys @@ -338,5 +337,3 @@ lexer = ply.lex.lex() if __name__ == "__main__": ply.lex.runmain() - # lexer = ply.lex.Lexer() - # ply.lex.runmain(lexer=lexer) diff --git a/il65/plyparser.py b/il65/plyparser.py index 06d5a4293..5b4775263 100644 --- a/il65/plyparser.py +++ b/il65/plyparser.py @@ -1,16 +1,16 @@ """ -Programming Language for 6502/6510 microprocessors +Programming Language for 6502/6510 microprocessors, codename 'Sick' This is the parser of the IL65 code, that generates a parse tree. -Written by Irmen de Jong (irmen@razorvine.net) -License: GNU GPL 3.0, see LICENSE +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from collections import defaultdict +from typing import Union, Generator, Tuple, List import attr from ply.yacc import yacc -from typing import Union, Generator, Tuple, List from .plylexer import SourceRef, tokens, lexer, find_tok_column +from .symbols import DataType start = "start" @@ -47,6 +47,11 @@ class AstNode: tostr(elt, level + 2) tostr(self, 0) + def process_expressions(self) -> None: + # process/simplify all expressions (constant folding etc) @todo + # override in node types that have expression(s) + pass + @attr.s(cmp=False, repr=False) class Directive(AstNode): @@ -66,11 +71,12 @@ class Scope(AstNode): # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"] self.symbols = {} for node in self.nodes: + assert isinstance(node, AstNode) if isinstance(node, (Label, VarDef)): self.symbols[node.name] = node if isinstance(node, Subroutine): self.symbols[node.name] = node - if node.scope is not None: + if node.scope: node.scope.parent_scope = self if isinstance(node, Block): if node.name: @@ -89,7 +95,7 @@ class Scope(AstNode): if not isinstance(scope, Scope): raise LookupError("undefined symbol: " + name) scope = scope.symbols.get(namepart, None) - if scope is None: + if not scope: raise LookupError("undefined symbol: " + name) return scope else: @@ -110,6 +116,13 @@ class Scope(AstNode): del self.symbols[node.name] self.nodes.remove(node) + def replace_node(self, oldnode: AstNode, newnode: AstNode) -> None: + assert isinstance(newnode, AstNode) + idx = self.nodes.index(oldnode) + self.nodes[idx] = newnode + if hasattr(oldnode, "name"): + del self.symbols[oldnode.name] + @attr.s(cmp=False, repr=False) class Module(AstNode): @@ -171,6 +184,18 @@ class Assignment(AstNode): left = attr.ib(type=list) # type: List[Union[str, TargetRegisters, Dereference]] right = attr.ib() + def __attrs_post_init__(self): + self.simplify_targetregisters() + + def simplify_targetregisters(self) -> None: + # optimize TargetRegisters down to single Register if it's just one register + new_targets = [] + for t in self.left: + if isinstance(t, TargetRegisters) and len(t.registers) == 1: + t = t.registers[0] + new_targets.append(t) + self.left = new_targets + @attr.s(cmp=False, repr=False) class AugAssignment(AstNode): @@ -215,12 +240,41 @@ class VarDef(AstNode): vartype = attr.ib() datatype = attr.ib() value = attr.ib(default=None) + size = attr.ib(type=int, default=None) + + def __attrs_post_init__(self): + # convert datatype node to enum + size + if self.datatype is None: + assert self.size is None + self.size = 1 + self.datatype = DataType.BYTE + elif isinstance(self.datatype, DatatypeNode): + assert self.size is None + self.size = self.datatype.dimensions + self.datatype = self.datatype.to_enum() + # if the value is an expression, mark it as a *constant* expression here + if isinstance(self.value, Expression): + self.value.processed_must_be_constant = True @attr.s(cmp=False, slots=True, repr=False) -class Datatype(AstNode): +class DatatypeNode(AstNode): name = attr.ib(type=str) - dimension = attr.ib(type=list, default=None) + dimensions = attr.ib(type=list, default=None) # if set, 1 or more dimensions (ints) + + def to_enum(self): + return { + "byte": DataType.BYTE, + "word": DataType.WORD, + "float": DataType.FLOAT, + "text": DataType.STRING, + "ptext": DataType.STRING_P, + "stext": DataType.STRING_S, + "pstext": DataType.STRING_PS, + "matrix": DataType.MATRIX, + "array": DataType.BYTEARRAY, + "wordarray": DataType.WORDARRAY + }[self.name] @attr.s(cmp=False, repr=False) @@ -232,9 +286,9 @@ class Subroutine(AstNode): address = attr.ib(type=int, default=None) def __attrs_post_init__(self): - if self.scope is not None and self.address is not None: + if self.scope and self.address is not None: raise ValueError("subroutine must have either a scope or an address, not both") - if self.scope is not None: + if self.scope: self.scope.name = self.name @@ -249,6 +303,18 @@ class Goto(AstNode): class Dereference(AstNode): location = attr.ib() datatype = attr.ib() + size = attr.ib(type=int, default=None) + + def __attrs_post_init__(self): + # convert datatype node to enum + size + if self.datatype is None: + assert self.size is None + self.size = 1 + self.datatype = DataType.BYTE + elif isinstance(self.datatype, DatatypeNode): + assert self.size is None + self.size = self.datatype.dimensions + self.datatype = self.datatype.to_enum() @attr.s(cmp=False, slots=True, repr=False) @@ -274,6 +340,9 @@ class Expression(AstNode): left = attr.ib() operator = attr.ib(type=str) right = attr.ib() + processed_must_be_constant = attr.ib(type=bool, init=False, default=False) # does the expression have to be a constant value? + processed = attr.ib(type=bool, init=False, default=False) # has this expression been processed/simplified yet? + constant = attr.ib(type=bool, init=False, default=False) # is the processed expression a constant value? def p_start(p): @@ -297,9 +366,15 @@ def p_module(p): | module_elements module_elt """ if len(p) == 2: - p[0] = [p[1]] + if p[1] is None: + p[0] = [] + else: + p[0] = [p[1]] else: - p[0] = p[1] + [p[2]] + if p[2] is None: + p[0] = p[1] + else: + p[0] = p[1] + [p[2]] def p_module_elt(p): @@ -377,7 +452,7 @@ def p_scope(p): """ scope : '{' scope_elements_opt '}' """ - p[0] = Scope(nodes=p[2], sourceref=_token_sref(p, 1)) + p[0] = Scope(nodes=p[2] or [], sourceref=_token_sref(p, 1)) def p_scope_elements_opt(p): @@ -453,9 +528,9 @@ def p_type_opt(p): | empty """ if len(p) == 5: - p[0] = Datatype(name=p[1], dimension=p[3], sourceref=_token_sref(p, 1)) - elif len(p) == 2: - p[0] = Datatype(name=p[1], sourceref=_token_sref(p, 1)) + p[0] = DatatypeNode(name=p[1], dimensions=p[3], sourceref=_token_sref(p, 1)) + elif len(p) == 2 and p[1]: + p[0] = DatatypeNode(name=p[1], sourceref=_token_sref(p, 1)) def p_dimensions(p): diff --git a/il65/symbols.py b/il65/symbols.py new file mode 100644 index 000000000..f1c85c8a6 --- /dev/null +++ b/il65/symbols.py @@ -0,0 +1,26 @@ +""" +Programming Language for 6502/6510 microprocessors, codename 'Sick' +Here are the symbol (name) operations such as lookups, datatype definitions. + +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +""" + + +import enum + + +class DataType(enum.Enum): + """The possible data types of values""" + BYTE = 1 + WORD = 2 + FLOAT = 3 + BYTEARRAY = 4 + WORDARRAY = 5 + MATRIX = 6 + STRING = 7 + STRING_P = 8 + STRING_S = 9 + STRING_PS = 10 + + +STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS} diff --git a/reference.md b/reference.md index ec0982860..a7ee32150 100644 --- a/reference.md +++ b/reference.md @@ -1,9 +1,9 @@ IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors ===================================================================================== -*Written by Irmen de Jong (irmen@razorvine.net)* +*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0* -*Software license: GNU GPL 3.0, see LICENSE* +*Software license: GNU GPL 3.0, see file LICENSE* This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 000000000..a8497ebc9 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +attrs +ply diff --git a/tests/test_compiler.py b/tests/test_compiler.py new file mode 100644 index 000000000..b3676330d --- /dev/null +++ b/tests/test_compiler.py @@ -0,0 +1,5 @@ +from il65.compiler import PlyParser + + +def test_compiler(): + pass # @todo diff --git a/tests/test_core.py b/tests/test_core.py new file mode 100644 index 000000000..9db3136c5 --- /dev/null +++ b/tests/test_core.py @@ -0,0 +1,20 @@ +from il65.symbols import DataType, STRING_DATATYPES +from il65.compiler import ParseError +from il65.plylexer import SourceRef + + +def test_datatypes(): + assert all(isinstance(s, DataType) for s in STRING_DATATYPES) + + +def test_sourceref(): + s = SourceRef("file", 99, 42) + assert str(s) == "file:99:42" + s = SourceRef("file", 99) + assert str(s) == "file:99" + + +def test_parseerror(): + p = ParseError("message", "source code", SourceRef("filename", 99, 42)) + assert p.args == ("message", ) + assert str(p) == "filename:99:42 message" diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py new file mode 100644 index 000000000..e40175b10 --- /dev/null +++ b/tests/test_optimizer.py @@ -0,0 +1,6 @@ +from il65.optimizer import Optimizer + + +def test_optimizer(): + pass # @todo + diff --git a/tests/test_parser.py b/tests/test_parser.py new file mode 100644 index 000000000..d1307179b --- /dev/null +++ b/tests/test_parser.py @@ -0,0 +1,111 @@ +from il65.plylexer import lexer, tokens, find_tok_column, literals, reserved +from il65.plyparser import parser, TokenFilter, Module, Subroutine, Block, Return + + +def test_lexer_definitions(): + assert "ENDL" in tokens + assert "GOTO" in tokens + assert '+' in literals + assert ';' not in literals + assert "return" in reserved + assert "sub" in reserved + assert "A" in reserved + assert "if_cc" in reserved + + +test_source = """ %output prg, sys + +; c1 + +; c2 + + +~ block $c000 { + %import a,b + + + ; comment + + var .matrix(20,30) m = 9.234556 + ;comment2 + + + sub calculate () -> () { + return + } + + ;z + +} +""" + +def test_lexer(): + lexer.input(test_source) + lexer.lineno = 1 + tokens = list(iter(lexer)) + token_types = list(t.type for t in tokens) + assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', 'ENDL', + 'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL', + 'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', + 'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'ENDL', + 'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', 'ENDL', 'ENDL', 'ENDL', + '}', 'ENDL'] + directive_token = tokens[12] + assert directive_token.type == "DIRECTIVE" + assert directive_token.value == "import" + assert directive_token.lineno == 9 + assert directive_token.lexpos == lexer.lexdata.index("%import") + assert find_tok_column(directive_token) == 10 + + +def test_tokenfilter(): + lexer.input(test_source) + lexer.lineno = 1 + filter = TokenFilter(lexer) + tokens = [] + while True: + token = filter.token() + if not token: + break + tokens.append(token) + token_types = list(t.type for t in tokens) + assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', + 'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL', + 'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', + 'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', + 'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', + '}', 'ENDL'] + + +def test_parser(): + lexer.lineno = 1 + lexer.source_filename = "sourcefile" + filter = TokenFilter(lexer) + result = parser.parse(input=test_source, tokenfunc=filter.token) + assert isinstance(result, Module) + assert result.name == "sourcefile" + assert result.scope.name == "" + assert result.subroutine_usage == {} + assert result.scope.parent_scope is None + sub = result.scope["block.calculate"] + assert isinstance(sub, Subroutine) + assert sub.name == "calculate" + block = result.scope["block"] + assert isinstance(block, Block) + assert block.name == "block" + assert block.address == 49152 + sub2 = block.scope["calculate"] + assert sub2 is sub + assert sub2.lineref == "src l. 18" + all_scopes = list(result.all_scopes()) + assert len(all_scopes) == 3 + assert isinstance(all_scopes[0][0], Module) + assert all_scopes[0][1] is None + assert isinstance(all_scopes[1][0], Block) + assert isinstance(all_scopes[1][1], Module) + assert isinstance(all_scopes[2][0], Subroutine) + assert isinstance(all_scopes[2][1], Block) + stmt = list(all_scopes[2][0].scope.filter_nodes(Return)) + assert len(stmt) == 1 + assert isinstance(stmt[0], Return) + assert stmt[0].lineref == "src l. 19" diff --git a/testsource/conditionals.ill b/testsource/conditionals.ill index d91013e65..6aa1b3991 100644 --- a/testsource/conditionals.ill +++ b/testsource/conditionals.ill @@ -80,9 +80,7 @@ label4: -; @todo temporarily disabled until comparison operators are properly implemented: - -~ { +~ conditionals { var bytevar = 22 + 23 var .text name = "?"*80 var bytevar2 = 23 diff --git a/todo.ill b/todo.ill index 4f8a5a60e..2b1f930fd 100644 --- a/todo.ill +++ b/todo.ill @@ -9,11 +9,12 @@ const num = 2 var var1 =2 - var .word wvar1 = 2 + var .word wvar1 = 2 + foo() ; @todo constant start: + wvar1 = 2+foo() A=math.randbyte() A += c64.RASTER @@ -148,6 +149,12 @@ loop : ;return A = $11 + A = $11 + A = $11 + X = $11 + Y = $11 + X = $11 + Y = $11 X = $22 Y = $33