optimize, tests, refactor

2024-11-18 19:12:44 +00:00 · 2018-01-08 03:31:23 +01:00 · 2018-01-08 03:31:23 +01:00 · b8506ee7d4
commit b8506ee7d4
parent 7d8c2bf161
25 changed files with 425 additions and 112 deletions
--- a/README.md
+++ b/README.md
@ -1,9 +1,9 @@
 IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
 =====================================================================================

-*Written by Irmen de Jong (irmen@razorvine.net)*
+*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*

-*Software license: GNU GPL 3.0, see LICENSE*
+*Software license: GNU GPL 3.0, see file LICENSE*


 This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's
--- a/il65/main.py
+++ b/il65/main.py
@ -1,8 +1,7 @@
 """
 Programming Language for 6502/6510 microprocessors

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 from . import main
--- a/il65/compiler.py
+++ b/il65/compiler.py
@ -1,30 +1,29 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the compiler of the IL65 code, that prepares the parse tree for code generation.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import re
 import os
 import sys
 import linecache
-from typing import Optional, Tuple, Set, Dict, Any, List
+from typing import Optional, Tuple, Set, Dict, Any, no_type_check
 from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
-    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters
+    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression
 from .plylexer import SourceRef, print_bold
 from .optimizer import optimize


 class ParseError(Exception):
    def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None:
+        super().__init__(message)
        self.sourceref = sourceref
-        self.msg = message
        self.sourcetext = sourcetext

    def __str__(self):
-        return "{} {:s}".format(self.sourceref, self.msg)
+        return "{} {:s}".format(self.sourceref, self.args[0])


 class PlyParser:
@ -39,6 +38,7 @@ class PlyParser:
            self.check_directives(module)
            self.process_imports(module)
            self.create_multiassigns(module)
+            self.process_all_expressions(module)
            if not self.parsing_import:
                self.determine_subroutine_usage(module)
        except ParseError as x:
@ -52,49 +52,52 @@ class PlyParser:
        self.parse_errors += 1
        print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))

+    @no_type_check
+    def process_all_expressions(self, module: Module) -> None:
+        # process/simplify all expressions (constant folding etc)
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
+                    if node is None:
+                        print(block, block.scope, block.scope.nodes)
+                    node.process_expressions()
+
+    @no_type_check
    def create_multiassigns(self, module: Module) -> None:
        # create multi-assign statements from nested assignments (A=B=C=5),
        # and optimize TargetRegisters down to single Register if it's just one register.
-        def simplify_targetregisters(targets: List[Any]) -> List[Any]:
-            new_targets = []
-            for t in targets:
-                if isinstance(t, TargetRegisters) and len(t.registers) == 1:
-                    t = t.registers[0]
-                new_targets.append(t)
-            return new_targets
-
        def reduce_right(assign: Assignment) -> Assignment:
            if isinstance(assign.right, Assignment):
                right = reduce_right(assign.right)
-                targets = simplify_targetregisters(right.left)
-                assign.left.extend(targets)
+                assign.left.extend(right.left)
                assign.right = right.right
            return assign

-        for mnode, parent in module.all_scopes():
-            if mnode.scope:
-                for node in mnode.scope.nodes:
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
                    if isinstance(node, Assignment):
-                        node.left = simplify_targetregisters(node.left)
                        if isinstance(node.right, Assignment):
                            multi = reduce_right(node)
                            assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
+                        node.simplify_targetregisters()

+    @no_type_check
    def determine_subroutine_usage(self, module: Module) -> None:
        module.subroutine_usage.clear()
-        for mnode, parent in module.all_scopes():
-            if mnode.scope:
-                for node in mnode.scope.nodes:
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
                    if isinstance(node, InlineAssembly):
-                        self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope)
+                        self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, block.scope)
                    elif isinstance(node, SubCall):
-                        self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                    elif isinstance(node, Goto):
-                        self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                    elif isinstance(node, Return):
-                        self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_return_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                    elif isinstance(node, Assignment):
-                        self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, block.scope)

    def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                             subcall: SubCall, parent_scope: Scope) -> None:
@ -265,7 +268,7 @@ class PlyParser:

 if __name__ == "__main__":
    description = "Compiler for IL65 language, code name 'Sick'"
-    print("\n" + description)
+    print("\n" + description + "\n")
    plyparser = PlyParser()
    m = plyparser.parse_file(sys.argv[1])
    optimize(m)
--- a/il65/handwritten/codegen.py
+++ b/il65/handwritten/codegen.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the assembly code generator (from the parse tree)

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import io
@ -259,6 +258,7 @@ class CodeGenerator:
            self.p("\t.pend\n")

    def generate_block_vars(self, block: Block) -> None:
+        # @todo block vars should be re-initialized when the program is run again, and not depend on statically prefilled data!
        consts = [c for c in block.symbols.iter_constants()]
        if consts:
            self.p("; constants")
--- a/il65/handwritten/exprparse.py
+++ b/il65/handwritten/exprparse.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the expression parser/evaluator.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import ast
--- a/il65/handwritten/optimize.py
+++ b/il65/handwritten/optimize.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the code to optimize the parse tree.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 from typing import List
--- a/il65/handwritten/parse.py
+++ b/il65/handwritten/parse.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the hand-written parser of the IL65 code, that generates a parse tree.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import re
@ -73,7 +72,7 @@ class Parser:
        if sub_usage is not None:
            # re-use the (global) subroutine usage tracking
            self.result.subroutine_usage = sub_usage
-        self.sourceref = SourceRef(filename, -1, 0)
+        self.sourceref = SourceRef(filename, -1, 0)   # type: ignore
        if sourcelines:
            self.lines = sourcelines
        else:
@ -234,7 +233,7 @@ class Parser:
    def _parse_2(self) -> None:
        # parsing pass 2 (not done during preprocessing!)
        self.cur_block = None
-        self.sourceref = SourceRef(self.sourceref.file, -1)
+        self.sourceref = SourceRef(self.sourceref.file, -1)   # type: ignore

        def imm_string_to_var(stmt: AssignmentStmt, containing_block: Block) -> None:
            if stmt.right.name or not isinstance(stmt.right, StringValue):
@ -358,7 +357,7 @@ class Parser:
        self._cur_lineidx += 1
        try:
            lineno, line = self.lines[self._cur_lineidx]
-            self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)
+            self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)    # type: ignore
            return line
        except IndexError:
            return ""
@ -366,7 +365,7 @@ class Parser:
    def prev_line(self) -> str:
        self._cur_lineidx -= 1
        lineno, line = self.lines[self._cur_lineidx]
-        self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)
+        self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)    # type: ignore
        return line

    def peek_next_line(self) -> str:
@ -382,7 +381,7 @@ class Parser:
            if num == lineno:
                sourceline = text.strip()
                break
-        return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column))
+        return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column))    # type: ignore

    def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]:
        if typestr == ".byte":
--- a/il65/handwritten/preprocess.py
+++ b/il65/handwritten/preprocess.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the preprocessing parser of the IL65 code, that only generates a symbol table.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 from typing import List, Tuple, Set
--- a/il65/handwritten/symbols.py
+++ b/il65/handwritten/symbols.py
@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 Here are the symbol (name) operations such as lookups, datatype definitions.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import inspect
@ -357,6 +356,9 @@ class SymbolTable:
    def iter_labels(self) -> Iterable[LabelDef]:
        yield from sorted((v for v in self.symbols.values() if isinstance(v, LabelDef)))

+    def remove_node(self, name: str) -> None:
+        del self.symbols[name]
+
    def check_identifier_valid(self, name: str, sourceref: SourceRef) -> None:
        if not name.isidentifier():
            raise SymbolError("invalid identifier")
--- a/il65/lib/c64lib.ill
+++ b/il65/lib/c64lib.ill
@ -1,9 +1,8 @@
 ; IL65 definitions for the Commodore-64
 ; Including memory registers, I/O registers, Basic and Kernel subroutines, utility subroutines.
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8


--- a/il65/lib/il65lib.ill
+++ b/il65/lib/il65lib.ill
@ -1,8 +1,7 @@
 ; IL65 internal library routines
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8


--- a/il65/lib/mathlib.ill
+++ b/il65/lib/mathlib.ill
@ -5,9 +5,8 @@
 ;	http://6502org.wikidot.com/software-math
 ;	http://codebase64.org/doku.php?id=base:6502_6510_maths
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8


--- a/il65/main.py
+++ b/il65/main.py
@ -1,11 +1,8 @@
-#! /usr/bin/env python3
-
 """
 Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the main program that drives the rest.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import time
--- a/il65/optimizer.py
+++ b/il65/optimizer.py
@ -1,12 +1,12 @@
 """
-Programming Language for 6502/6510 microprocessors
-This is the code to optimize the parse tree.
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+This is the optimizer that applies various optimizations to the parse tree.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

-from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment
+from typing import no_type_check
+from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression
 from .plylexer import print_warning, print_bold


@ -17,31 +17,70 @@ class Optimizer:

    def optimize(self) -> None:
        self.num_warnings = 0
-        # self.remove_augmentedassign_incrdecr_nops(block)   # @todo
        self.remove_useless_assigns()
-        # self.combine_assignments_into_multi(block)   # @todo
+        self.combine_assignments_into_multi()
        self.optimize_multiassigns()
        self.remove_unused_subroutines()
-        # self.optimize_compare_with_zero(block)  # @todo
+        self.optimize_compare_with_zero()
        self.remove_empty_blocks()

-    def remove_useless_assigns(self) -> None:
+    def remove_useless_assigns(self):
        # remove assignment statements that do nothing (A=A)
-        for mnode, parent in self.module.all_scopes():
-            if mnode.scope:
-                for assignment in list(mnode.scope.nodes):
+        # and augmented assignments that have no effect (A+=0)
+        # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for assignment in list(block.scope.nodes):
                    if isinstance(assignment, Assignment):
                        assignment.left = [lv for lv in assignment.left if lv != assignment.right]
                        if not assignment.left:
-                            mnode.scope.remove_node(assignment)
+                            block.scope.remove_node(assignment)
                            self.num_warnings += 1
-                            print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref))
+                            print_warning("{}: removed statement that has no effect".format(assignment.sourceref))
+                    if isinstance(assignment, AugAssignment):
+                        if isinstance(assignment.right, (int, float)):
+                            if assignment.right == 0 and assignment.operator in ("+=", "-=", "|=", "<<=", ">>=", "^="):
+                                self.num_warnings += 1
+                                print_warning("{}: removed statement that has no effect".format(assignment.sourceref))
+                                block.scope.remove_node(assignment)
+                            if assignment.right >= 8 and assignment.operator in ("<<=", ">>="):
+                                self.num_warnings += 1
+                                print_warning("{}: shifting result is always zero".format(assignment.sourceref))
+                                new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref)
+                                block.scope.replace_node(assignment, new_stmt)

-    def optimize_multiassigns(self) -> None:
+    def combine_assignments_into_multi(self):
+        # fold multiple consecutive assignments with the same rvalue into one multi-assignment
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                rvalue = None
+                assignments = []
+                for stmt in list(block.scope.nodes):
+                    if isinstance(stmt, Assignment):
+                        if assignments:
+                            if stmt.right == rvalue:
+                                assignments.append(stmt)
+                                continue
+                            elif len(assignments) > 1:
+                                # replace the first assignment by a multi-assign with all the others
+                                for stmt in assignments[1:]:
+                                    print("{}: joined with previous assignment".format(stmt.sourceref))
+                                    assignments[0].left.extend(stmt.left)
+                                    block.scope.remove_node(stmt)
+                                rvalue = None
+                                assignments.clear()
+                        else:
+                            rvalue = stmt.right
+                            assignments.append(stmt)
+                    else:
+                        rvalue = None
+                        assignments.clear()
+
+    def optimize_multiassigns(self):
        # optimize multi-assign statements (remove duplicate targets, optimize order)
-        for mnode, parent in self.module.all_scopes():
-            if mnode.scope:
-                for assignment in mnode.scope.nodes:
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for assignment in block.scope.nodes:
                    if isinstance(assignment, Assignment) and len(assignment.left) > 1:
                        # remove duplicates
                        lvalues = set(assignment.left)
@ -51,7 +90,7 @@ class Optimizer:
                        # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
                        assignment.left = list(lvalues)

-    def remove_unused_subroutines(self) -> None:
+    def remove_unused_subroutines(self):
        # some symbols are used by the emitted assembly code from the code generator,
        # and should never be removed or the assembler will fail
        never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB",
@ -66,6 +105,39 @@ class Optimizer:
                    num_discarded += 1
        print("discarded {:d} unused subroutines".format(num_discarded))

+    def optimize_compare_with_zero(self):
+        # a conditional goto that compares a value with zero will be simplified
+        # the comparison operator and rvalue (0) will be removed and the if-status changed accordingly
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for stmt in block.scope.filter_nodes(Goto):
+                    if isinstance(stmt.condition, Expression):
+                        raise NotImplementedError("optimize goto conditionals", stmt.condition)   # @todo
+                        # if cond and isinstance(cond.rvalue, (int, float)) and cond.rvalue.value == 0:
+                        #     simplified = False
+                        #     if cond.ifstatus in ("true", "ne"):
+                        #         if cond.comparison_op == "==":
+                        #             # if_true something == 0   ->  if_not something
+                        #             cond.ifstatus = "not"
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #         elif cond.comparison_op == "!=":
+                        #             # if_true something != 0  -> if_true something
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #     elif cond.ifstatus in ("not", "eq"):
+                        #         if cond.comparison_op == "==":
+                        #             # if_not something == 0   ->  if_true something
+                        #             cond.ifstatus = "true"
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #         elif cond.comparison_op == "!=":
+                        #             # if_not something != 0  -> if_not something
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #     if simplified:
+                        #         print("{}: simplified comparison with zero".format(stmt.sourceref))
+
    def remove_empty_blocks(self) -> None:
        # remove blocks without name and without address, or that are empty
        for node, parent in self.module.all_scopes():
@ -94,6 +166,6 @@ def optimize(mod: Module) -> None:
    opt.optimize()
    if opt.num_warnings:
        if opt.num_warnings == 1:
-            print_bold("there is one optimization warning.")
+            print_bold("\nThere is one optimization warning.\n")
        else:
-            print_bold("there are {:d} optimization warnings.".format(opt.num_warnings))
+            print_bold("\nThere are {:d} optimization warnings.\n".format(opt.num_warnings))
--- a/il65/plylexer.py
+++ b/il65/plylexer.py
@ -1,9 +1,8 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the lexer of the IL65 code, that generates a stream of tokens for the parser.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 import sys
@ -338,5 +337,3 @@ lexer = ply.lex.lex()

 if __name__ == "__main__":
    ply.lex.runmain()
-    # lexer = ply.lex.Lexer()
-    # ply.lex.runmain(lexer=lexer)
--- a/il65/plyparser.py
+++ b/il65/plyparser.py
@ -1,16 +1,16 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the parser of the IL65 code, that generates a parse tree.

-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

 from collections import defaultdict
+from typing import Union, Generator, Tuple, List
 import attr
 from ply.yacc import yacc
-from typing import Union, Generator, Tuple, List
 from .plylexer import SourceRef, tokens, lexer, find_tok_column
+from .symbols import DataType


 start = "start"
@ -47,6 +47,11 @@ class AstNode:
                                tostr(elt, level + 2)
        tostr(self, 0)

+    def process_expressions(self) -> None:
+        # process/simplify all expressions (constant folding etc)   @todo
+        # override in node types that have expression(s)
+        pass
+

@attr.s(cmp=False, repr=False)
 class Directive(AstNode):
@ -66,11 +71,12 @@ class Scope(AstNode):
        # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"]
        self.symbols = {}
        for node in self.nodes:
+            assert isinstance(node, AstNode)
            if isinstance(node, (Label, VarDef)):
                self.symbols[node.name] = node
            if isinstance(node, Subroutine):
                self.symbols[node.name] = node
-                if node.scope is not None:
+                if node.scope:
                    node.scope.parent_scope = self
            if isinstance(node, Block):
                if node.name:
@ -89,7 +95,7 @@ class Scope(AstNode):
                if not isinstance(scope, Scope):
                    raise LookupError("undefined symbol: " + name)
                scope = scope.symbols.get(namepart, None)
-                if scope is None:
+                if not scope:
                    raise LookupError("undefined symbol: " + name)
            return scope
        else:
@ -110,6 +116,13 @@ class Scope(AstNode):
            del self.symbols[node.name]
        self.nodes.remove(node)

+    def replace_node(self, oldnode: AstNode, newnode: AstNode) -> None:
+        assert isinstance(newnode, AstNode)
+        idx = self.nodes.index(oldnode)
+        self.nodes[idx] = newnode
+        if hasattr(oldnode, "name"):
+            del self.symbols[oldnode.name]
+

@attr.s(cmp=False, repr=False)
 class Module(AstNode):
@ -171,6 +184,18 @@ class Assignment(AstNode):
    left = attr.ib(type=list)     # type: List[Union[str, TargetRegisters, Dereference]]
    right = attr.ib()

+    def __attrs_post_init__(self):
+        self.simplify_targetregisters()
+
+    def simplify_targetregisters(self) -> None:
+        # optimize TargetRegisters down to single Register if it's just one register
+        new_targets = []
+        for t in self.left:
+            if isinstance(t, TargetRegisters) and len(t.registers) == 1:
+                t = t.registers[0]
+            new_targets.append(t)
+        self.left = new_targets
+

@attr.s(cmp=False, repr=False)
 class AugAssignment(AstNode):
@ -215,12 +240,41 @@ class VarDef(AstNode):
    vartype = attr.ib()
    datatype = attr.ib()
    value = attr.ib(default=None)
+    size = attr.ib(type=int, default=None)
+
+    def __attrs_post_init__(self):
+        # convert datatype node to enum + size
+        if self.datatype is None:
+            assert self.size is None
+            self.size = 1
+            self.datatype = DataType.BYTE
+        elif isinstance(self.datatype, DatatypeNode):
+            assert self.size is None
+            self.size = self.datatype.dimensions
+            self.datatype = self.datatype.to_enum()
+        # if the value is an expression, mark it as a *constant* expression here
+        if isinstance(self.value, Expression):
+            self.value.processed_must_be_constant = True


@attr.s(cmp=False, slots=True, repr=False)
-class Datatype(AstNode):
+class DatatypeNode(AstNode):
    name = attr.ib(type=str)
-    dimension = attr.ib(type=list, default=None)
+    dimensions = attr.ib(type=list, default=None)    # if set, 1 or more dimensions (ints)
+
+    def to_enum(self):
+        return {
+            "byte": DataType.BYTE,
+            "word": DataType.WORD,
+            "float": DataType.FLOAT,
+            "text": DataType.STRING,
+            "ptext": DataType.STRING_P,
+            "stext": DataType.STRING_S,
+            "pstext": DataType.STRING_PS,
+            "matrix": DataType.MATRIX,
+            "array": DataType.BYTEARRAY,
+            "wordarray": DataType.WORDARRAY
+        }[self.name]


@attr.s(cmp=False, repr=False)
@ -232,9 +286,9 @@ class Subroutine(AstNode):
    address = attr.ib(type=int, default=None)

    def __attrs_post_init__(self):
-        if self.scope is not None and self.address is not None:
+        if self.scope and self.address is not None:
            raise ValueError("subroutine must have either a scope or an address, not both")
-        if self.scope is not None:
+        if self.scope:
            self.scope.name = self.name


@ -249,6 +303,18 @@ class Goto(AstNode):
 class Dereference(AstNode):
    location = attr.ib()
    datatype = attr.ib()
+    size = attr.ib(type=int, default=None)
+
+    def __attrs_post_init__(self):
+        # convert datatype node to enum + size
+        if self.datatype is None:
+            assert self.size is None
+            self.size = 1
+            self.datatype = DataType.BYTE
+        elif isinstance(self.datatype, DatatypeNode):
+            assert self.size is None
+            self.size = self.datatype.dimensions
+            self.datatype = self.datatype.to_enum()


@attr.s(cmp=False, slots=True, repr=False)
@ -274,6 +340,9 @@ class Expression(AstNode):
    left = attr.ib()
    operator = attr.ib(type=str)
    right = attr.ib()
+    processed_must_be_constant = attr.ib(type=bool, init=False, default=False)     # does the expression have to be a constant value?
+    processed = attr.ib(type=bool, init=False, default=False)    # has this expression been processed/simplified yet?
+    constant = attr.ib(type=bool, init=False, default=False)     # is the processed expression a constant value?


 def p_start(p):
@ -297,9 +366,15 @@ def p_module(p):
                    |  module_elements  module_elt
    """
    if len(p) == 2:
-        p[0] = [p[1]]
+        if p[1] is None:
+            p[0] = []
+        else:
+            p[0] = [p[1]]
    else:
-        p[0] = p[1] + [p[2]]
+        if p[2] is None:
+            p[0] = p[1]
+        else:
+            p[0] = p[1] + [p[2]]


 def p_module_elt(p):
@ -377,7 +452,7 @@ def p_scope(p):
    """
    scope :  '{'  scope_elements_opt  '}'
    """
-    p[0] = Scope(nodes=p[2], sourceref=_token_sref(p, 1))
+    p[0] = Scope(nodes=p[2] or [], sourceref=_token_sref(p, 1))


 def p_scope_elements_opt(p):
@ -453,9 +528,9 @@ def p_type_opt(p):
             |  empty
    """
    if len(p) == 5:
-        p[0] = Datatype(name=p[1], dimension=p[3], sourceref=_token_sref(p, 1))
-    elif len(p) == 2:
-        p[0] = Datatype(name=p[1], sourceref=_token_sref(p, 1))
+        p[0] = DatatypeNode(name=p[1], dimensions=p[3], sourceref=_token_sref(p, 1))
+    elif len(p) == 2 and p[1]:
+        p[0] = DatatypeNode(name=p[1], sourceref=_token_sref(p, 1))


 def p_dimensions(p):
--- a/il65/symbols.py
+++ b/il65/symbols.py
@ -0,0 +1,26 @@
+"""
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+Here are the symbol (name) operations such as lookups, datatype definitions.
+
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+"""
+
+
+import enum
+
+
+class DataType(enum.Enum):
+    """The possible data types of values"""
+    BYTE = 1
+    WORD = 2
+    FLOAT = 3
+    BYTEARRAY = 4
+    WORDARRAY = 5
+    MATRIX = 6
+    STRING = 7
+    STRING_P = 8
+    STRING_S = 9
+    STRING_PS = 10
+
+
+STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS}
--- a/reference.md
+++ b/reference.md
@ -1,9 +1,9 @@
 IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
 =====================================================================================

-*Written by Irmen de Jong (irmen@razorvine.net)*
+*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*

-*Software license: GNU GPL 3.0, see LICENSE*
+*Software license: GNU GPL 3.0, see file LICENSE*


 This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's
--- a/requirements.txt
+++ b/requirements.txt
@ -0,0 +1,2 @@
+attrs
+ply
--- a/tests/test_compiler.py
+++ b/tests/test_compiler.py
@ -0,0 +1,5 @@
+from il65.compiler import PlyParser
+
+
+def test_compiler():
+    pass  # @todo
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -0,0 +1,20 @@
+from il65.symbols import DataType, STRING_DATATYPES
+from il65.compiler import ParseError
+from il65.plylexer import SourceRef
+
+
+def test_datatypes():
+    assert all(isinstance(s, DataType) for s in STRING_DATATYPES)
+
+
+def test_sourceref():
+    s = SourceRef("file", 99, 42)
+    assert str(s) == "file:99:42"
+    s = SourceRef("file", 99)
+    assert str(s) == "file:99"
+
+
+def test_parseerror():
+    p = ParseError("message", "source code", SourceRef("filename", 99, 42))
+    assert p.args == ("message", )
+    assert str(p) == "filename:99:42 message"
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@ -0,0 +1,6 @@
+from il65.optimizer import Optimizer
+
+
+def test_optimizer():
+    pass  # @todo
+
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@ -0,0 +1,111 @@
+from il65.plylexer import lexer, tokens, find_tok_column, literals, reserved
+from il65.plyparser import parser, TokenFilter, Module, Subroutine, Block, Return
+
+
+def test_lexer_definitions():
+    assert "ENDL" in tokens
+    assert "GOTO" in tokens
+    assert '+' in literals
+    assert ';' not in literals
+    assert "return" in reserved
+    assert "sub" in reserved
+    assert "A" in reserved
+    assert "if_cc" in reserved
+
+
+test_source = """ %output prg, sys
+
+; c1
+
+; c2
+
+
+~ block $c000 {
+         %import a,b
+
+
+    ; comment
+
+    var .matrix(20,30) m = 9.234556
+    ;comment2
+
+
+    sub calculate () -> () {
+        return 
+    }
+    
+    ;z
+    
+}
+"""
+
+def test_lexer():
+    lexer.input(test_source)
+    lexer.lineno = 1
+    tokens = list(iter(lexer))
+    token_types = list(t.type for t in tokens)
+    assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', 'ENDL',
+                           'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
+                           'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL',
+                           'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'ENDL',
+                           'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', 'ENDL', 'ENDL', 'ENDL',
+                           '}', 'ENDL']
+    directive_token = tokens[12]
+    assert directive_token.type == "DIRECTIVE"
+    assert directive_token.value == "import"
+    assert directive_token.lineno == 9
+    assert directive_token.lexpos == lexer.lexdata.index("%import")
+    assert find_tok_column(directive_token) == 10
+
+
+def test_tokenfilter():
+    lexer.input(test_source)
+    lexer.lineno = 1
+    filter = TokenFilter(lexer)
+    tokens = []
+    while True:
+        token = filter.token()
+        if not token:
+            break
+        tokens.append(token)
+    token_types = list(t.type for t in tokens)
+    assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
+                           'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
+                           'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
+                           'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL',
+                           'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL',
+                           '}', 'ENDL']
+
+
+def test_parser():
+    lexer.lineno = 1
+    lexer.source_filename = "sourcefile"
+    filter = TokenFilter(lexer)
+    result = parser.parse(input=test_source, tokenfunc=filter.token)
+    assert isinstance(result, Module)
+    assert result.name == "sourcefile"
+    assert result.scope.name == "<sourcefile global scope>"
+    assert result.subroutine_usage == {}
+    assert result.scope.parent_scope is None
+    sub = result.scope["block.calculate"]
+    assert isinstance(sub, Subroutine)
+    assert sub.name == "calculate"
+    block = result.scope["block"]
+    assert isinstance(block, Block)
+    assert block.name == "block"
+    assert block.address == 49152
+    sub2 = block.scope["calculate"]
+    assert sub2 is sub
+    assert sub2.lineref == "src l. 18"
+    all_scopes = list(result.all_scopes())
+    assert len(all_scopes) == 3
+    assert isinstance(all_scopes[0][0], Module)
+    assert all_scopes[0][1] is None
+    assert isinstance(all_scopes[1][0], Block)
+    assert isinstance(all_scopes[1][1], Module)
+    assert isinstance(all_scopes[2][0], Subroutine)
+    assert isinstance(all_scopes[2][1], Block)
+    stmt = list(all_scopes[2][0].scope.filter_nodes(Return))
+    assert len(stmt) == 1
+    assert isinstance(stmt[0], Return)
+    assert stmt[0].lineref == "src l. 19"
--- a/testsource/conditionals.ill
+++ b/testsource/conditionals.ill
@ -80,9 +80,7 @@ label4:



-; @todo temporarily disabled until comparison operators are properly implemented:
-
-~ {
+~ conditionals {
        var  bytevar = 22 + 23
        var  .text  name        = "?"*80
        var  bytevar2 = 23
--- a/todo.ill
+++ b/todo.ill
@ -9,11 +9,12 @@

                const   num = 2
                var     var1  =2
-                var .word  wvar1 = 2
+                var .word  wvar1 = 2 + foo()    ; @todo constant


 start:

+	wvar1 = 2+foo()

        A=math.randbyte()
        A +=  c64.RASTER
@ -148,6 +149,12 @@ loop   :
        ;return

        A = $11
+        A = $11
+        A = $11
+        X = $11
+        Y = $11
+        X = $11
+        Y = $11
        X = $22
        Y = $33