From b8506ee7d44c76a0efba24f4ce3063cc73370bd6 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Mon, 8 Jan 2018 03:31:23 +0100
Subject: [PATCH] optimize, tests, refactor

---
 README.md                      |   4 +-
 il65/__main__.py               |   3 +-
 il65/compiler.py               |  63 +++++++++---------
 il65/handwritten/codegen.py    |   4 +-
 il65/handwritten/exprparse.py  |   3 +-
 il65/handwritten/optimize.py   |   3 +-
 il65/handwritten/parse.py      |  13 ++--
 il65/handwritten/preprocess.py |   3 +-
 il65/handwritten/symbols.py    |   6 +-
 il65/lib/c64lib.ill            |   5 +-
 il65/lib/il65lib.ill           |   5 +-
 il65/lib/mathlib.ill           |   5 +-
 il65/main.py                   |   5 +-
 il65/optimizer.py              | 114 +++++++++++++++++++++++++++------
 il65/plylexer.py               |   7 +-
 il65/plyparser.py              | 107 ++++++++++++++++++++++++++-----
 il65/symbols.py                |  26 ++++++++
 reference.md                   |   4 +-
 requirements.txt               |   2 +
 tests/test_compiler.py         |   5 ++
 tests/test_core.py             |  20 ++++++
 tests/test_optimizer.py        |   6 ++
 tests/test_parser.py           | 111 ++++++++++++++++++++++++++++++++
 testsource/conditionals.ill    |   4 +-
 todo.ill                       |   9 ++-
 25 files changed, 425 insertions(+), 112 deletions(-)
 create mode 100644 il65/symbols.py
 create mode 100644 requirements.txt
 create mode 100644 tests/test_compiler.py
 create mode 100644 tests/test_core.py
 create mode 100644 tests/test_optimizer.py
 create mode 100644 tests/test_parser.py

diff --git a/README.md b/README.md
index 4db22e28b..4e16c0f1c 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
 =====================================================================================
 
-*Written by Irmen de Jong (irmen@razorvine.net)*
+*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*
 
-*Software license: GNU GPL 3.0, see LICENSE*
+*Software license: GNU GPL 3.0, see file LICENSE*
 
 
 This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's
diff --git a/il65/__main__.py b/il65/__main__.py
index 427977c50..4ea58a929 100644
--- a/il65/__main__.py
+++ b/il65/__main__.py
@@ -1,8 +1,7 @@
 """
 Programming Language for 6502/6510 microprocessors
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 from . import main
diff --git a/il65/compiler.py b/il65/compiler.py
index dc36122bd..50e84d110 100644
--- a/il65/compiler.py
+++ b/il65/compiler.py
@@ -1,30 +1,29 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the compiler of the IL65 code, that prepares the parse tree for code generation.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import re
 import os
 import sys
 import linecache
-from typing import Optional, Tuple, Set, Dict, Any, List
+from typing import Optional, Tuple, Set, Dict, Any, no_type_check
 from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
-    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters
+    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression
 from .plylexer import SourceRef, print_bold
 from .optimizer import optimize
 
 
 class ParseError(Exception):
     def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None:
+        super().__init__(message)
         self.sourceref = sourceref
-        self.msg = message
         self.sourcetext = sourcetext
 
     def __str__(self):
-        return "{} {:s}".format(self.sourceref, self.msg)
+        return "{} {:s}".format(self.sourceref, self.args[0])
 
 
 class PlyParser:
@@ -39,6 +38,7 @@ class PlyParser:
             self.check_directives(module)
             self.process_imports(module)
             self.create_multiassigns(module)
+            self.process_all_expressions(module)
             if not self.parsing_import:
                 self.determine_subroutine_usage(module)
         except ParseError as x:
@@ -52,49 +52,52 @@ class PlyParser:
         self.parse_errors += 1
         print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
 
+    @no_type_check
+    def process_all_expressions(self, module: Module) -> None:
+        # process/simplify all expressions (constant folding etc)
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
+                    if node is None:
+                        print(block, block.scope, block.scope.nodes)
+                    node.process_expressions()
+
+    @no_type_check
     def create_multiassigns(self, module: Module) -> None:
         # create multi-assign statements from nested assignments (A=B=C=5),
         # and optimize TargetRegisters down to single Register if it's just one register.
-        def simplify_targetregisters(targets: List[Any]) -> List[Any]:
-            new_targets = []
-            for t in targets:
-                if isinstance(t, TargetRegisters) and len(t.registers) == 1:
-                    t = t.registers[0]
-                new_targets.append(t)
-            return new_targets
-
         def reduce_right(assign: Assignment) -> Assignment:
             if isinstance(assign.right, Assignment):
                 right = reduce_right(assign.right)
-                targets = simplify_targetregisters(right.left)
-                assign.left.extend(targets)
+                assign.left.extend(right.left)
                 assign.right = right.right
             return assign
 
-        for mnode, parent in module.all_scopes():
-            if mnode.scope:
-                for node in mnode.scope.nodes:
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
                     if isinstance(node, Assignment):
-                        node.left = simplify_targetregisters(node.left)
                         if isinstance(node.right, Assignment):
                             multi = reduce_right(node)
                             assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
+                        node.simplify_targetregisters()
 
+    @no_type_check
     def determine_subroutine_usage(self, module: Module) -> None:
         module.subroutine_usage.clear()
-        for mnode, parent in module.all_scopes():
-            if mnode.scope:
-                for node in mnode.scope.nodes:
+        for block, parent in module.all_scopes():
+            if block.scope:
+                for node in block.scope.nodes:
                     if isinstance(node, InlineAssembly):
-                        self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope)
+                        self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, block.scope)
                     elif isinstance(node, SubCall):
-                        self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                     elif isinstance(node, Goto):
-                        self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                     elif isinstance(node, Return):
-                        self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_return_for_subroutine_usages(module.subroutine_usage, node, block.scope)
                     elif isinstance(node, Assignment):
-                        self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
+                        self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, block.scope)
 
     def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                              subcall: SubCall, parent_scope: Scope) -> None:
@@ -265,7 +268,7 @@ class PlyParser:
 
 if __name__ == "__main__":
     description = "Compiler for IL65 language, code name 'Sick'"
-    print("\n" + description)
+    print("\n" + description + "\n")
     plyparser = PlyParser()
     m = plyparser.parse_file(sys.argv[1])
     optimize(m)
diff --git a/il65/handwritten/codegen.py b/il65/handwritten/codegen.py
index ed542775d..d61fedaba 100644
--- a/il65/handwritten/codegen.py
+++ b/il65/handwritten/codegen.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the assembly code generator (from the parse tree)
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import io
@@ -259,6 +258,7 @@ class CodeGenerator:
             self.p("\t.pend\n")
 
     def generate_block_vars(self, block: Block) -> None:
+        # @todo block vars should be re-initialized when the program is run again, and not depend on statically prefilled data!
         consts = [c for c in block.symbols.iter_constants()]
         if consts:
             self.p("; constants")
diff --git a/il65/handwritten/exprparse.py b/il65/handwritten/exprparse.py
index b2e2dc43b..421cbfa8b 100644
--- a/il65/handwritten/exprparse.py
+++ b/il65/handwritten/exprparse.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the expression parser/evaluator.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import ast
diff --git a/il65/handwritten/optimize.py b/il65/handwritten/optimize.py
index 4d4c671ec..0368eab4d 100644
--- a/il65/handwritten/optimize.py
+++ b/il65/handwritten/optimize.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the code to optimize the parse tree.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 from typing import List
diff --git a/il65/handwritten/parse.py b/il65/handwritten/parse.py
index 9ced08794..2a5e04067 100644
--- a/il65/handwritten/parse.py
+++ b/il65/handwritten/parse.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the hand-written parser of the IL65 code, that generates a parse tree.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import re
@@ -73,7 +72,7 @@ class Parser:
         if sub_usage is not None:
             # re-use the (global) subroutine usage tracking
             self.result.subroutine_usage = sub_usage
-        self.sourceref = SourceRef(filename, -1, 0)
+        self.sourceref = SourceRef(filename, -1, 0)   # type: ignore
         if sourcelines:
             self.lines = sourcelines
         else:
@@ -234,7 +233,7 @@ class Parser:
     def _parse_2(self) -> None:
         # parsing pass 2 (not done during preprocessing!)
         self.cur_block = None
-        self.sourceref = SourceRef(self.sourceref.file, -1)
+        self.sourceref = SourceRef(self.sourceref.file, -1)   # type: ignore
 
         def imm_string_to_var(stmt: AssignmentStmt, containing_block: Block) -> None:
             if stmt.right.name or not isinstance(stmt.right, StringValue):
@@ -358,7 +357,7 @@ class Parser:
         self._cur_lineidx += 1
         try:
             lineno, line = self.lines[self._cur_lineidx]
-            self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)
+            self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)    # type: ignore
             return line
         except IndexError:
             return ""
@@ -366,7 +365,7 @@ class Parser:
     def prev_line(self) -> str:
         self._cur_lineidx -= 1
         lineno, line = self.lines[self._cur_lineidx]
-        self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)
+        self.sourceref = SourceRef(file=self.sourceref.file, line=lineno)    # type: ignore
         return line
 
     def peek_next_line(self) -> str:
@@ -382,7 +381,7 @@ class Parser:
             if num == lineno:
                 sourceline = text.strip()
                 break
-        return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column))
+        return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column))    # type: ignore
 
     def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]:
         if typestr == ".byte":
diff --git a/il65/handwritten/preprocess.py b/il65/handwritten/preprocess.py
index e11a85c54..985d76c0b 100644
--- a/il65/handwritten/preprocess.py
+++ b/il65/handwritten/preprocess.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 This is the preprocessing parser of the IL65 code, that only generates a symbol table.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 from typing import List, Tuple, Set
diff --git a/il65/handwritten/symbols.py b/il65/handwritten/symbols.py
index 3898a1839..9a158856b 100644
--- a/il65/handwritten/symbols.py
+++ b/il65/handwritten/symbols.py
@@ -2,8 +2,7 @@
 Programming Language for 6502/6510 microprocessors
 Here are the symbol (name) operations such as lookups, datatype definitions.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import inspect
@@ -357,6 +356,9 @@ class SymbolTable:
     def iter_labels(self) -> Iterable[LabelDef]:
         yield from sorted((v for v in self.symbols.values() if isinstance(v, LabelDef)))
 
+    def remove_node(self, name: str) -> None:
+        del self.symbols[name]
+
     def check_identifier_valid(self, name: str, sourceref: SourceRef) -> None:
         if not name.isidentifier():
             raise SymbolError("invalid identifier")
diff --git a/il65/lib/c64lib.ill b/il65/lib/c64lib.ill
index 91eb4c2df..200623d32 100644
--- a/il65/lib/c64lib.ill
+++ b/il65/lib/c64lib.ill
@@ -1,9 +1,8 @@
 ; IL65 definitions for the Commodore-64
 ; Including memory registers, I/O registers, Basic and Kernel subroutines, utility subroutines.
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8
 
 
diff --git a/il65/lib/il65lib.ill b/il65/lib/il65lib.ill
index ba6c8f7aa..dce7b038a 100644
--- a/il65/lib/il65lib.ill
+++ b/il65/lib/il65lib.ill
@@ -1,8 +1,7 @@
 ; IL65 internal library routines
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8
 
 
diff --git a/il65/lib/mathlib.ill b/il65/lib/mathlib.ill
index 74f80617b..fffe77659 100644
--- a/il65/lib/mathlib.ill
+++ b/il65/lib/mathlib.ill
@@ -5,9 +5,8 @@
 ;	http://6502org.wikidot.com/software-math
 ;	http://codebase64.org/doku.php?id=base:6502_6510_maths
 ;
-; Written by Irmen de Jong (irmen@razorvine.net)
-; License: GNU GPL 3.0, see LICENSE
-;
+; Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+; ;
 ; indent format: TABS, size=8
 
 
diff --git a/il65/main.py b/il65/main.py
index 970cf3fbf..e770a24d0 100644
--- a/il65/main.py
+++ b/il65/main.py
@@ -1,11 +1,8 @@
-#! /usr/bin/env python3
-
 """
 Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the main program that drives the rest.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import time
diff --git a/il65/optimizer.py b/il65/optimizer.py
index 660dff0b1..552bbd1dc 100644
--- a/il65/optimizer.py
+++ b/il65/optimizer.py
@@ -1,12 +1,12 @@
 """
-Programming Language for 6502/6510 microprocessors
-This is the code to optimize the parse tree.
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+This is the optimizer that applies various optimizations to the parse tree.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
-from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment
+from typing import no_type_check
+from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression
 from .plylexer import print_warning, print_bold
 
 
@@ -17,31 +17,70 @@ class Optimizer:
 
     def optimize(self) -> None:
         self.num_warnings = 0
-        # self.remove_augmentedassign_incrdecr_nops(block)   # @todo
         self.remove_useless_assigns()
-        # self.combine_assignments_into_multi(block)   # @todo
+        self.combine_assignments_into_multi()
         self.optimize_multiassigns()
         self.remove_unused_subroutines()
-        # self.optimize_compare_with_zero(block)  # @todo
+        self.optimize_compare_with_zero()
         self.remove_empty_blocks()
 
-    def remove_useless_assigns(self) -> None:
+    def remove_useless_assigns(self):
         # remove assignment statements that do nothing (A=A)
-        for mnode, parent in self.module.all_scopes():
-            if mnode.scope:
-                for assignment in list(mnode.scope.nodes):
+        # and augmented assignments that have no effect (A+=0)
+        # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for assignment in list(block.scope.nodes):
                     if isinstance(assignment, Assignment):
                         assignment.left = [lv for lv in assignment.left if lv != assignment.right]
                         if not assignment.left:
-                            mnode.scope.remove_node(assignment)
+                            block.scope.remove_node(assignment)
                             self.num_warnings += 1
-                            print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref))
+                            print_warning("{}: removed statement that has no effect".format(assignment.sourceref))
+                    if isinstance(assignment, AugAssignment):
+                        if isinstance(assignment.right, (int, float)):
+                            if assignment.right == 0 and assignment.operator in ("+=", "-=", "|=", "<<=", ">>=", "^="):
+                                self.num_warnings += 1
+                                print_warning("{}: removed statement that has no effect".format(assignment.sourceref))
+                                block.scope.remove_node(assignment)
+                            if assignment.right >= 8 and assignment.operator in ("<<=", ">>="):
+                                self.num_warnings += 1
+                                print_warning("{}: shifting result is always zero".format(assignment.sourceref))
+                                new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref)
+                                block.scope.replace_node(assignment, new_stmt)
 
-    def optimize_multiassigns(self) -> None:
+    def combine_assignments_into_multi(self):
+        # fold multiple consecutive assignments with the same rvalue into one multi-assignment
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                rvalue = None
+                assignments = []
+                for stmt in list(block.scope.nodes):
+                    if isinstance(stmt, Assignment):
+                        if assignments:
+                            if stmt.right == rvalue:
+                                assignments.append(stmt)
+                                continue
+                            elif len(assignments) > 1:
+                                # replace the first assignment by a multi-assign with all the others
+                                for stmt in assignments[1:]:
+                                    print("{}: joined with previous assignment".format(stmt.sourceref))
+                                    assignments[0].left.extend(stmt.left)
+                                    block.scope.remove_node(stmt)
+                                rvalue = None
+                                assignments.clear()
+                        else:
+                            rvalue = stmt.right
+                            assignments.append(stmt)
+                    else:
+                        rvalue = None
+                        assignments.clear()
+
+    def optimize_multiassigns(self):
         # optimize multi-assign statements (remove duplicate targets, optimize order)
-        for mnode, parent in self.module.all_scopes():
-            if mnode.scope:
-                for assignment in mnode.scope.nodes:
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for assignment in block.scope.nodes:
                     if isinstance(assignment, Assignment) and len(assignment.left) > 1:
                         # remove duplicates
                         lvalues = set(assignment.left)
@@ -51,7 +90,7 @@ class Optimizer:
                         # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
                         assignment.left = list(lvalues)
 
-    def remove_unused_subroutines(self) -> None:
+    def remove_unused_subroutines(self):
         # some symbols are used by the emitted assembly code from the code generator,
         # and should never be removed or the assembler will fail
         never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB",
@@ -66,6 +105,39 @@ class Optimizer:
                     num_discarded += 1
         print("discarded {:d} unused subroutines".format(num_discarded))
 
+    def optimize_compare_with_zero(self):
+        # a conditional goto that compares a value with zero will be simplified
+        # the comparison operator and rvalue (0) will be removed and the if-status changed accordingly
+        for block, parent in self.module.all_scopes():
+            if block.scope:
+                for stmt in block.scope.filter_nodes(Goto):
+                    if isinstance(stmt.condition, Expression):
+                        raise NotImplementedError("optimize goto conditionals", stmt.condition)   # @todo
+                        # if cond and isinstance(cond.rvalue, (int, float)) and cond.rvalue.value == 0:
+                        #     simplified = False
+                        #     if cond.ifstatus in ("true", "ne"):
+                        #         if cond.comparison_op == "==":
+                        #             # if_true something == 0   ->  if_not something
+                        #             cond.ifstatus = "not"
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #         elif cond.comparison_op == "!=":
+                        #             # if_true something != 0  -> if_true something
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #     elif cond.ifstatus in ("not", "eq"):
+                        #         if cond.comparison_op == "==":
+                        #             # if_not something == 0   ->  if_true something
+                        #             cond.ifstatus = "true"
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #         elif cond.comparison_op == "!=":
+                        #             # if_not something != 0  -> if_not something
+                        #             cond.comparison_op, cond.rvalue = "", None
+                        #             simplified = True
+                        #     if simplified:
+                        #         print("{}: simplified comparison with zero".format(stmt.sourceref))
+
     def remove_empty_blocks(self) -> None:
         # remove blocks without name and without address, or that are empty
         for node, parent in self.module.all_scopes():
@@ -94,6 +166,6 @@ def optimize(mod: Module) -> None:
     opt.optimize()
     if opt.num_warnings:
         if opt.num_warnings == 1:
-            print_bold("there is one optimization warning.")
+            print_bold("\nThere is one optimization warning.\n")
         else:
-            print_bold("there are {:d} optimization warnings.".format(opt.num_warnings))
+            print_bold("\nThere are {:d} optimization warnings.\n".format(opt.num_warnings))
diff --git a/il65/plylexer.py b/il65/plylexer.py
index 283fab27d..eb0e47ea8 100644
--- a/il65/plylexer.py
+++ b/il65/plylexer.py
@@ -1,9 +1,8 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the lexer of the IL65 code, that generates a stream of tokens for the parser.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 import sys
@@ -338,5 +337,3 @@ lexer = ply.lex.lex()
 
 if __name__ == "__main__":
     ply.lex.runmain()
-    # lexer = ply.lex.Lexer()
-    # ply.lex.runmain(lexer=lexer)
diff --git a/il65/plyparser.py b/il65/plyparser.py
index 06d5a4293..5b4775263 100644
--- a/il65/plyparser.py
+++ b/il65/plyparser.py
@@ -1,16 +1,16 @@
 """
-Programming Language for 6502/6510 microprocessors
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
 This is the parser of the IL65 code, that generates a parse tree.
 
-Written by Irmen de Jong (irmen@razorvine.net)
-License: GNU GPL 3.0, see LICENSE
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """
 
 from collections import defaultdict
+from typing import Union, Generator, Tuple, List
 import attr
 from ply.yacc import yacc
-from typing import Union, Generator, Tuple, List
 from .plylexer import SourceRef, tokens, lexer, find_tok_column
+from .symbols import DataType
 
 
 start = "start"
@@ -47,6 +47,11 @@ class AstNode:
                                 tostr(elt, level + 2)
         tostr(self, 0)
 
+    def process_expressions(self) -> None:
+        # process/simplify all expressions (constant folding etc)   @todo
+        # override in node types that have expression(s)
+        pass
+
 
 @attr.s(cmp=False, repr=False)
 class Directive(AstNode):
@@ -66,11 +71,12 @@ class Scope(AstNode):
         # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"]
         self.symbols = {}
         for node in self.nodes:
+            assert isinstance(node, AstNode)
             if isinstance(node, (Label, VarDef)):
                 self.symbols[node.name] = node
             if isinstance(node, Subroutine):
                 self.symbols[node.name] = node
-                if node.scope is not None:
+                if node.scope:
                     node.scope.parent_scope = self
             if isinstance(node, Block):
                 if node.name:
@@ -89,7 +95,7 @@ class Scope(AstNode):
                 if not isinstance(scope, Scope):
                     raise LookupError("undefined symbol: " + name)
                 scope = scope.symbols.get(namepart, None)
-                if scope is None:
+                if not scope:
                     raise LookupError("undefined symbol: " + name)
             return scope
         else:
@@ -110,6 +116,13 @@ class Scope(AstNode):
             del self.symbols[node.name]
         self.nodes.remove(node)
 
+    def replace_node(self, oldnode: AstNode, newnode: AstNode) -> None:
+        assert isinstance(newnode, AstNode)
+        idx = self.nodes.index(oldnode)
+        self.nodes[idx] = newnode
+        if hasattr(oldnode, "name"):
+            del self.symbols[oldnode.name]
+
 
 @attr.s(cmp=False, repr=False)
 class Module(AstNode):
@@ -171,6 +184,18 @@ class Assignment(AstNode):
     left = attr.ib(type=list)     # type: List[Union[str, TargetRegisters, Dereference]]
     right = attr.ib()
 
+    def __attrs_post_init__(self):
+        self.simplify_targetregisters()
+
+    def simplify_targetregisters(self) -> None:
+        # optimize TargetRegisters down to single Register if it's just one register
+        new_targets = []
+        for t in self.left:
+            if isinstance(t, TargetRegisters) and len(t.registers) == 1:
+                t = t.registers[0]
+            new_targets.append(t)
+        self.left = new_targets
+
 
 @attr.s(cmp=False, repr=False)
 class AugAssignment(AstNode):
@@ -215,12 +240,41 @@ class VarDef(AstNode):
     vartype = attr.ib()
     datatype = attr.ib()
     value = attr.ib(default=None)
+    size = attr.ib(type=int, default=None)
+
+    def __attrs_post_init__(self):
+        # convert datatype node to enum + size
+        if self.datatype is None:
+            assert self.size is None
+            self.size = 1
+            self.datatype = DataType.BYTE
+        elif isinstance(self.datatype, DatatypeNode):
+            assert self.size is None
+            self.size = self.datatype.dimensions
+            self.datatype = self.datatype.to_enum()
+        # if the value is an expression, mark it as a *constant* expression here
+        if isinstance(self.value, Expression):
+            self.value.processed_must_be_constant = True
 
 
 @attr.s(cmp=False, slots=True, repr=False)
-class Datatype(AstNode):
+class DatatypeNode(AstNode):
     name = attr.ib(type=str)
-    dimension = attr.ib(type=list, default=None)
+    dimensions = attr.ib(type=list, default=None)    # if set, 1 or more dimensions (ints)
+
+    def to_enum(self):
+        return {
+            "byte": DataType.BYTE,
+            "word": DataType.WORD,
+            "float": DataType.FLOAT,
+            "text": DataType.STRING,
+            "ptext": DataType.STRING_P,
+            "stext": DataType.STRING_S,
+            "pstext": DataType.STRING_PS,
+            "matrix": DataType.MATRIX,
+            "array": DataType.BYTEARRAY,
+            "wordarray": DataType.WORDARRAY
+        }[self.name]
 
 
 @attr.s(cmp=False, repr=False)
@@ -232,9 +286,9 @@ class Subroutine(AstNode):
     address = attr.ib(type=int, default=None)
 
     def __attrs_post_init__(self):
-        if self.scope is not None and self.address is not None:
+        if self.scope and self.address is not None:
             raise ValueError("subroutine must have either a scope or an address, not both")
-        if self.scope is not None:
+        if self.scope:
             self.scope.name = self.name
 
 
@@ -249,6 +303,18 @@ class Goto(AstNode):
 class Dereference(AstNode):
     location = attr.ib()
     datatype = attr.ib()
+    size = attr.ib(type=int, default=None)
+
+    def __attrs_post_init__(self):
+        # convert datatype node to enum + size
+        if self.datatype is None:
+            assert self.size is None
+            self.size = 1
+            self.datatype = DataType.BYTE
+        elif isinstance(self.datatype, DatatypeNode):
+            assert self.size is None
+            self.size = self.datatype.dimensions
+            self.datatype = self.datatype.to_enum()
 
 
 @attr.s(cmp=False, slots=True, repr=False)
@@ -274,6 +340,9 @@ class Expression(AstNode):
     left = attr.ib()
     operator = attr.ib(type=str)
     right = attr.ib()
+    processed_must_be_constant = attr.ib(type=bool, init=False, default=False)     # does the expression have to be a constant value?
+    processed = attr.ib(type=bool, init=False, default=False)    # has this expression been processed/simplified yet?
+    constant = attr.ib(type=bool, init=False, default=False)     # is the processed expression a constant value?
 
 
 def p_start(p):
@@ -297,9 +366,15 @@ def p_module(p):
                     |  module_elements  module_elt
     """
     if len(p) == 2:
-        p[0] = [p[1]]
+        if p[1] is None:
+            p[0] = []
+        else:
+            p[0] = [p[1]]
     else:
-        p[0] = p[1] + [p[2]]
+        if p[2] is None:
+            p[0] = p[1]
+        else:
+            p[0] = p[1] + [p[2]]
 
 
 def p_module_elt(p):
@@ -377,7 +452,7 @@ def p_scope(p):
     """
     scope :  '{'  scope_elements_opt  '}'
     """
-    p[0] = Scope(nodes=p[2], sourceref=_token_sref(p, 1))
+    p[0] = Scope(nodes=p[2] or [], sourceref=_token_sref(p, 1))
 
 
 def p_scope_elements_opt(p):
@@ -453,9 +528,9 @@ def p_type_opt(p):
              |  empty
     """
     if len(p) == 5:
-        p[0] = Datatype(name=p[1], dimension=p[3], sourceref=_token_sref(p, 1))
-    elif len(p) == 2:
-        p[0] = Datatype(name=p[1], sourceref=_token_sref(p, 1))
+        p[0] = DatatypeNode(name=p[1], dimensions=p[3], sourceref=_token_sref(p, 1))
+    elif len(p) == 2 and p[1]:
+        p[0] = DatatypeNode(name=p[1], sourceref=_token_sref(p, 1))
 
 
 def p_dimensions(p):
diff --git a/il65/symbols.py b/il65/symbols.py
new file mode 100644
index 000000000..f1c85c8a6
--- /dev/null
+++ b/il65/symbols.py
@@ -0,0 +1,26 @@
+"""
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+Here are the symbol (name) operations such as lookups, datatype definitions.
+
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+"""
+
+
+import enum
+
+
+class DataType(enum.Enum):
+    """The possible data types of values"""
+    BYTE = 1
+    WORD = 2
+    FLOAT = 3
+    BYTEARRAY = 4
+    WORDARRAY = 5
+    MATRIX = 6
+    STRING = 7
+    STRING_P = 8
+    STRING_S = 9
+    STRING_PS = 10
+
+
+STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS}
diff --git a/reference.md b/reference.md
index ec0982860..a7ee32150 100644
--- a/reference.md
+++ b/reference.md
@@ -1,9 +1,9 @@
 IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
 =====================================================================================
 
-*Written by Irmen de Jong (irmen@razorvine.net)*
+*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*
 
-*Software license: GNU GPL 3.0, see LICENSE*
+*Software license: GNU GPL 3.0, see file LICENSE*
 
 
 This is an experimental programming language for the 8-bit 6502/6510 microprocessor from the late 1970's and 1980's
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 000000000..a8497ebc9
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,2 @@
+attrs
+ply
diff --git a/tests/test_compiler.py b/tests/test_compiler.py
new file mode 100644
index 000000000..b3676330d
--- /dev/null
+++ b/tests/test_compiler.py
@@ -0,0 +1,5 @@
+from il65.compiler import PlyParser
+
+
+def test_compiler():
+    pass  # @todo
diff --git a/tests/test_core.py b/tests/test_core.py
new file mode 100644
index 000000000..9db3136c5
--- /dev/null
+++ b/tests/test_core.py
@@ -0,0 +1,20 @@
+from il65.symbols import DataType, STRING_DATATYPES
+from il65.compiler import ParseError
+from il65.plylexer import SourceRef
+
+
+def test_datatypes():
+    assert all(isinstance(s, DataType) for s in STRING_DATATYPES)
+
+
+def test_sourceref():
+    s = SourceRef("file", 99, 42)
+    assert str(s) == "file:99:42"
+    s = SourceRef("file", 99)
+    assert str(s) == "file:99"
+
+
+def test_parseerror():
+    p = ParseError("message", "source code", SourceRef("filename", 99, 42))
+    assert p.args == ("message", )
+    assert str(p) == "filename:99:42 message"
diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py
new file mode 100644
index 000000000..e40175b10
--- /dev/null
+++ b/tests/test_optimizer.py
@@ -0,0 +1,6 @@
+from il65.optimizer import Optimizer
+
+
+def test_optimizer():
+    pass  # @todo
+
diff --git a/tests/test_parser.py b/tests/test_parser.py
new file mode 100644
index 000000000..d1307179b
--- /dev/null
+++ b/tests/test_parser.py
@@ -0,0 +1,111 @@
+from il65.plylexer import lexer, tokens, find_tok_column, literals, reserved
+from il65.plyparser import parser, TokenFilter, Module, Subroutine, Block, Return
+
+
+def test_lexer_definitions():
+    assert "ENDL" in tokens
+    assert "GOTO" in tokens
+    assert '+' in literals
+    assert ';' not in literals
+    assert "return" in reserved
+    assert "sub" in reserved
+    assert "A" in reserved
+    assert "if_cc" in reserved
+
+
+test_source = """ %output prg, sys
+
+; c1
+
+; c2
+
+
+~ block $c000 {
+         %import a,b
+
+
+    ; comment
+
+    var .matrix(20,30) m = 9.234556
+    ;comment2
+
+
+    sub calculate () -> () {
+        return 
+    }
+    
+    ;z
+    
+}
+"""
+
+def test_lexer():
+    lexer.input(test_source)
+    lexer.lineno = 1
+    tokens = list(iter(lexer))
+    token_types = list(t.type for t in tokens)
+    assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', 'ENDL',
+                           'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
+                           'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL',
+                           'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'ENDL',
+                           'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', 'ENDL', 'ENDL', 'ENDL',
+                           '}', 'ENDL']
+    directive_token = tokens[12]
+    assert directive_token.type == "DIRECTIVE"
+    assert directive_token.value == "import"
+    assert directive_token.lineno == 9
+    assert directive_token.lexpos == lexer.lexdata.index("%import")
+    assert find_tok_column(directive_token) == 10
+
+
+def test_tokenfilter():
+    lexer.input(test_source)
+    lexer.lineno = 1
+    filter = TokenFilter(lexer)
+    tokens = []
+    while True:
+        token = filter.token()
+        if not token:
+            break
+        tokens.append(token)
+    token_types = list(t.type for t in tokens)
+    assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
+                           'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
+                           'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
+                           'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL',
+                           'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL',
+                           '}', 'ENDL']
+
+
+def test_parser():
+    lexer.lineno = 1
+    lexer.source_filename = "sourcefile"
+    filter = TokenFilter(lexer)
+    result = parser.parse(input=test_source, tokenfunc=filter.token)
+    assert isinstance(result, Module)
+    assert result.name == "sourcefile"
+    assert result.scope.name == "<sourcefile global scope>"
+    assert result.subroutine_usage == {}
+    assert result.scope.parent_scope is None
+    sub = result.scope["block.calculate"]
+    assert isinstance(sub, Subroutine)
+    assert sub.name == "calculate"
+    block = result.scope["block"]
+    assert isinstance(block, Block)
+    assert block.name == "block"
+    assert block.address == 49152
+    sub2 = block.scope["calculate"]
+    assert sub2 is sub
+    assert sub2.lineref == "src l. 18"
+    all_scopes = list(result.all_scopes())
+    assert len(all_scopes) == 3
+    assert isinstance(all_scopes[0][0], Module)
+    assert all_scopes[0][1] is None
+    assert isinstance(all_scopes[1][0], Block)
+    assert isinstance(all_scopes[1][1], Module)
+    assert isinstance(all_scopes[2][0], Subroutine)
+    assert isinstance(all_scopes[2][1], Block)
+    stmt = list(all_scopes[2][0].scope.filter_nodes(Return))
+    assert len(stmt) == 1
+    assert isinstance(stmt[0], Return)
+    assert stmt[0].lineref == "src l. 19"
diff --git a/testsource/conditionals.ill b/testsource/conditionals.ill
index d91013e65..6aa1b3991 100644
--- a/testsource/conditionals.ill
+++ b/testsource/conditionals.ill
@@ -80,9 +80,7 @@ label4:
 
 
 
-; @todo temporarily disabled until comparison operators are properly implemented:
-
-~ {
+~ conditionals {
         var  bytevar = 22 + 23
         var  .text  name        = "?"*80
         var  bytevar2 = 23
diff --git a/todo.ill b/todo.ill
index 4f8a5a60e..2b1f930fd 100644
--- a/todo.ill
+++ b/todo.ill
@@ -9,11 +9,12 @@
 
                 const   num = 2
                 var     var1  =2
-                var .word  wvar1 = 2
+                var .word  wvar1 = 2 + foo()    ; @todo constant
 
 
 start:
 
+	wvar1 = 2+foo()
 
         A=math.randbyte()
         A +=  c64.RASTER
@@ -148,6 +149,12 @@ loop   :
         ;return
 
         A = $11
+        A = $11
+        A = $11
+        X = $11
+        Y = $11
+        X = $11
+        Y = $11
         X = $22
         Y = $33