code generation v2 started

2024-12-24 16:29:21 +00:00 · 2018-01-09 02:40:32 +01:00 · 2018-01-09 02:40:32 +01:00 · 14e36a8708
commit 14e36a8708
parent b8506ee7d4
14 changed files with 551 additions and 104 deletions
--- a/il65/compiler.py
+++ b/il65/compiler.py
@ -10,20 +10,11 @@ import os
 import sys
 import linecache
 from typing import Optional, Tuple, Set, Dict, Any, no_type_check
-from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
-    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression
-from .plylexer import SourceRef, print_bold
-from .optimizer import optimize
-
-
-class ParseError(Exception):
-    def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None:
-        super().__init__(message)
-        self.sourceref = sourceref
-        self.sourcetext = sourcetext
-
-    def __str__(self):
-        return "{} {:s}".format(self.sourceref, self.args[0])
+import attr
+from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, \
+    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions
+from .plylex import SourceRef, print_bold
+from .optimize import optimize


 class PlyParser:
@ -33,14 +24,18 @@ class PlyParser:

    def parse_file(self, filename: str) -> Module:
        print("parsing:", filename)
-        module = parse_file(filename, self.lexer_error)
+        module = None
        try:
+            module = parse_file(filename, self.lexer_error)
            self.check_directives(module)
            self.process_imports(module)
            self.create_multiassigns(module)
            self.process_all_expressions(module)
            if not self.parsing_import:
+                # these shall only be done on the main module after all imports have been done:
+                self.apply_directive_options(module)
                self.determine_subroutine_usage(module)
+                self.check_and_merge_zeropages(module)
        except ParseError as x:
            self.handle_parse_error(x)
        if self.parse_errors:
@ -52,6 +47,27 @@ class PlyParser:
        self.parse_errors += 1
        print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))

+    def check_and_merge_zeropages(self, module: Module) -> None:
+        # merge all ZP blocks into one
+        zeropage = None
+        for block in list(module.scope.filter_nodes(Block)):
+            if block.name == "ZP":
+                if zeropage:
+                    # merge other ZP block into first ZP block
+                    for node in block.scope.nodes:
+                        if isinstance(node, Directive):
+                            zeropage.scope.add_node(node, 0)
+                        elif isinstance(node, VarDef):
+                            zeropage.scope.add_node(node)
+                        else:
+                            raise ParseError("only variables and directives allowed in zeropage block", node.sourceref)
+                else:
+                    zeropage = block
+                module.scope.remove_node(block)
+        if zeropage:
+            # add the zero page again, as the very first block
+            module.scope.add_node(zeropage, 0)
+
    @no_type_check
    def process_all_expressions(self, module: Module) -> None:
        # process/simplify all expressions (constant folding etc)
@ -82,6 +98,82 @@ class PlyParser:
                            assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
                        node.simplify_targetregisters()

+    def apply_directive_options(self, module: Module) -> None:
+        def set_save_registers(scope: Scope, save_dir: Directive) -> None:
+            if not scope:
+                return
+            if len(save_dir.args) > 1:
+                raise ParseError("need zero or one directive argument", save_dir.sourceref)
+            if save_dir.args:
+                if save_dir.args[0] in ("yes", "true"):
+                    scope.save_registers = True
+                elif save_dir.args[0] in ("no", "false"):
+                    scope.save_registers = False
+                else:
+                    raise ParseError("invalid directive args", save_dir.sourceref)
+            else:
+                scope.save_registers = True
+
+        for block, parent in module.all_scopes():
+            if isinstance(block, Module):
+                # process the module's directives
+                for directive in block.scope.filter_nodes(Directive):
+                    if directive.name == "output":
+                        if len(directive.args) != 1 or not isinstance(directive.args[0], str):
+                            raise ParseError("need one str directive argument", directive.sourceref)
+                        if directive.args[0] == "raw":
+                            block.format = ProgramFormat.RAW
+                            block.address = 0xc000
+                        elif directive.args[0] == "prg":
+                            block.format = ProgramFormat.PRG
+                            block.address = 0x0801
+                        elif directive.args[0] == "basic":
+                            block.format = ProgramFormat.BASIC
+                            block.address = 0x0801
+                        else:
+                            raise ParseError("invalid directive args", directive.sourceref)
+                    elif directive.name == "address":
+                        if len(directive.args) != 1 or not isinstance(directive.args[0], int):
+                            raise ParseError("need one integer directive argument", directive.sourceref)
+                        if block.format == ProgramFormat.BASIC:
+                            raise ParseError("basic cannot have a custom load address", directive.sourceref)
+                        block.address = directive.args[0]
+                        attr.validate(block)
+                    elif directive.name in "import":
+                        pass   # is processed earlier
+                    elif directive.name == "zp":
+                        if len(directive.args) not in (1, 2) or set(directive.args) - {"clobber", "restore"}:
+                            raise ParseError("invalid directive args", directive.sourceref)
+                        if "clobber" in directive.args and "restore" in directive.args:
+                            module.zp_options = ZpOptions.CLOBBER_RESTORE
+                        elif "clobber" in directive.args:
+                            module.zp_options = ZpOptions.CLOBBER
+                        elif "restore" in directive.args:
+                            raise ParseError("invalid directive args", directive.sourceref)
+                    elif directive.name == "saveregisters":
+                        set_save_registers(block.scope, directive)
+                    else:
+                        raise NotImplementedError(directive.name)
+            elif isinstance(block, Block):
+                # process the block's directives
+                for directive in block.scope.filter_nodes(Directive):
+                    if directive.name == "saveregisters":
+                        set_save_registers(block.scope, directive)
+                    elif directive.name in ("breakpoint", "asmbinary", "asminclude"):
+                        continue
+                    else:
+                        raise NotImplementedError(directive.name)
+            elif isinstance(block, Subroutine):
+                if block.scope:
+                    # process the sub's directives
+                    for directive in block.scope.filter_nodes(Directive):
+                        if directive.name == "saveregisters":
+                            set_save_registers(block.scope, directive)
+                        elif directive.name in ("breakpoint", "asmbinary", "asminclude"):
+                            continue
+                        else:
+                            raise NotImplementedError(directive.name)
+
    @no_type_check
    def determine_subroutine_usage(self, module: Module) -> None:
        module.subroutine_usage.clear()
@ -177,10 +269,10 @@ class PlyParser:
                imports = set()  # type: Set[str]
                for directive in node.scope.filter_nodes(Directive):
                    if directive.name not in {"output", "zp", "address", "import", "saveregisters"}:
-                        raise ParseError("invalid directive in module", None, directive.sourceref)
+                        raise ParseError("invalid directive in module", directive.sourceref)
                    if directive.name == "import":
                        if imports & set(directive.args):
-                            raise ParseError("duplicate import", None, directive.sourceref)
+                            raise ParseError("duplicate import", directive.sourceref)
                        imports |= set(directive.args)
            if isinstance(node, (Block, Subroutine)):
                # check block and subroutine-level directives
@ -190,9 +282,9 @@ class PlyParser:
                for sub_node in node.scope.nodes:
                    if isinstance(sub_node, Directive):
                        if sub_node.name not in {"asmbinary", "asminclude", "breakpoint", "saveregisters"}:
-                            raise ParseError("invalid directive in " + node.__class__.__name__.lower(), None, sub_node.sourceref)
+                            raise ParseError("invalid directive in " + node.__class__.__name__.lower(), sub_node.sourceref)
                        if sub_node.name == "saveregisters" and not first_node:
-                            raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref)
+                            raise ParseError("saveregisters directive should be the first", sub_node.sourceref)
                    first_node = False

    def process_imports(self, module: Module) -> None:
@ -201,11 +293,11 @@ class PlyParser:
        for directive in module.scope.filter_nodes(Directive):
            if directive.name == "import":
                if len(directive.args) < 1:
-                    raise ParseError("missing argument(s) for import directive", None, directive.sourceref)
+                    raise ParseError("missing argument(s) for import directive", directive.sourceref)
                for arg in directive.args:
                    filename = self.find_import_file(arg, directive.sourceref.file)
                    if not filename:
-                        raise ParseError("imported file not found", None, directive.sourceref)
+                        raise ParseError("imported file not found", directive.sourceref)
                    imported_module, import_parse_errors = self.import_file(filename)
                    imported_module.scope.parent_scope = module.scope
                    imported.append(imported_module)
@ -252,16 +344,11 @@ class PlyParser:
            print("Error (in imported file):", str(exc), file=sys.stderr)
        else:
            print("Error:", str(exc), file=sys.stderr)
-        if exc.sourcetext is None:
-            exc.sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip()
-        if exc.sourcetext:
-            # remove leading whitespace
-            stripped = exc.sourcetext.lstrip()
-            num_spaces = len(exc.sourcetext) - len(stripped)
-            stripped = stripped.rstrip()
-            print("  " + stripped, file=sys.stderr)
+        sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip()
+        if sourcetext:
+            print("  " + sourcetext.expandtabs(1), file=sys.stderr)
            if exc.sourceref.column:
-                print("  " + ' ' * (exc.sourceref.column - num_spaces) + '^', file=sys.stderr)
+                print(' ' * (1+exc.sourceref.column) + '^', file=sys.stderr)
        if sys.stderr.isatty():
            print("\x1b[0m", file=sys.stderr, end="", flush=True)

--- a/il65/generateasm.py
+++ b/il65/generateasm.py
@ -0,0 +1,194 @@
+"""
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+This is the assembly code generator (from the parse tree)
+
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+"""
+
+import io
+import re
+import subprocess
+import datetime
+from typing import Union
+from .plyparse import Module, ProgramFormat, Block, Directive, VarDef, Label, ZpOptions, DataType
+from .symbols import to_hex
+
+
+class CodeError(Exception):
+    pass
+
+
+class AssemblyGenerator:
+    BREAKPOINT_COMMENT_SIGNATURE = "~~~BREAKPOINT~~~"
+    BREAKPOINT_COMMENT_DETECTOR = r".(?P<address>\w+)\s+ea\s+nop\s+;\s+{:s}.*".format(BREAKPOINT_COMMENT_SIGNATURE)
+
+    def __init__(self, module: Module) -> None:
+        self.module = module
+        self.generated_code = io.StringIO()
+
+    def p(self, text, *args, **vargs):
+        # replace '\v' (vertical tab) char by the actual line indent (2 tabs) and write to the stringIo
+        print(text.replace("\v", "\t\t"), *args, file=self.generated_code, **vargs)
+
+    def generate(self, filename: str) -> None:
+        self._generate()
+        with open(filename, "wt") as out:
+            out.write(self.generated_code.getvalue())
+        self.generated_code.close()
+
+    def _generate(self) -> None:
+        self.sanitycheck()
+        self.header()
+        self.initialize_variables()
+        self.blocks()
+        self.footer()
+
+    def sanitycheck(self):
+        # duplicate block names?
+        all_blocknames = [b.name for b in self.module.scope.filter_nodes(Block)]
+        unique_blocknames = set(all_blocknames)
+        if len(all_blocknames) != len(unique_blocknames):
+            for name in unique_blocknames:
+                all_blocknames.remove(name)
+            raise CodeError("there are duplicate block names", all_blocknames)
+        zpblock = self.module.zeropage()
+        if zpblock:
+            # ZP block contains no code?
+            for stmt in zpblock.scope.nodes:
+                if not isinstance(stmt, (Directive, VarDef)):
+                    raise CodeError("ZP block can only contain directive and var")
+
+    def header(self):
+        self.p("; code generated by il65.py - codename 'Sick'")
+        self.p("; source file:", self.module.sourceref.file)
+        self.p("; compiled on:", datetime.datetime.now())
+        self.p("; output options:", self.module.format, self.module.zp_options)
+        self.p("; assembler syntax is for the 64tasm cross-assembler")
+        self.p("\n.cpu  '6502'\n.enc  'none'\n")
+        assert self.module.address is not None
+        if self.module.format in (ProgramFormat.PRG, ProgramFormat.BASIC):
+            if self.module.format == ProgramFormat.BASIC:
+                if self.module.address != 0x0801:
+                    raise CodeError("BASIC output mode must have load address $0801")
+                self.p("; ---- basic program with sys call ----")
+                self.p("* = " + to_hex(self.module.address))
+                year = datetime.datetime.now().year
+                self.p("\v.word  (+), {:d}".format(year))
+                self.p("\v.null  $9e, format(' %d ', _il65_sysaddr), $3a, $8f, ' il65 by idj'")
+                self.p("+\v.word  0")
+                self.p("_il65_sysaddr\v; assembly code starts here\n")
+            else:
+                self.p("; ---- program without sys call ----")
+                self.p("* = " + to_hex(self.module.address) + "\n")
+        elif self.module.format == ProgramFormat.RAW:
+            self.p("; ---- raw assembler program ----")
+            self.p("* = " + to_hex(self.module.address) + "\n")
+
+    def initialize_variables(self) -> None:
+        if self.module.zp_options == ZpOptions.CLOBBER_RESTORE:
+            self.p("\vjsr  il65_lib_zp.save_zeropage")
+        zp_float_bytes = {}
+        # Only the vars from the ZeroPage need to be initialized here,
+        # the vars in all other blocks are just defined and pre-filled there.
+        zpblock = self.module.zeropage()
+        if zpblock:
+            vars_to_init = [v for v in zpblock.scope.filter_nodes(VarDef)
+                            if v.allocate and v.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT)]
+            # @todo optimize sort order (sort on value first, then type, then blockname, then address/name)
+            # (str(self.value) or "", self.blockname, self.name or "", self.address or 0, self.seq_nr)
+            prev_value = 0  # type: Union[str, int, float]
+            if vars_to_init:
+                self.p("; init zp vars")
+                self.p("\vlda  #0\n\vldx  #0")
+                for variable in vars_to_init:
+                    vname = zpblock.label + '.' + variable.name
+                    vvalue = variable.value
+                    if variable.type == DataType.BYTE:
+                        if vvalue != prev_value:
+                            self.p("\vlda  #${:02x}".format(vvalue))
+                            prev_value = vvalue
+                        self.p("\vsta  {:s}".format(vname))
+                    elif variable.type == DataType.WORD:
+                        if vvalue != prev_value:
+                            self.p("\vlda  #<${:04x}".format(vvalue))
+                            self.p("\vldx  #>${:04x}".format(vvalue))
+                            prev_value = vvalue
+                        self.p("\vsta  {:s}".format(vname))
+                        self.p("\vstx  {:s}+1".format(vname))
+                    elif variable.type == DataType.FLOAT:
+                        bytes = self.to_mflpt5(vvalue)   # type: ignore
+                        zp_float_bytes[variable.name] = (vname, bytes, vvalue)
+                if zp_float_bytes:
+                    self.p("\vldx  #4")
+                    self.p("-")
+                    for varname, (vname, b, fv) in zp_float_bytes.items():
+                        self.p("\vlda  _float_bytes_{:s},x".format(varname))
+                        self.p("\vsta  {:s},x".format(vname))
+                    self.p("\vdex")
+                    self.p("\vbpl  -")
+                self.p("; end init zp vars")
+            else:
+                self.p("\v; there are no zp vars to initialize")
+        else:
+            self.p("\v; there is no zp block to initialize")
+        if self.module.zp_options == ZpOptions.CLOBBER_RESTORE:
+            self.p("\vjsr  {:s}.start\v; call user code".format(self.module.main().label))
+            self.p("\vcld")
+            self.p("\vjmp  il65_lib_zp.restore_zeropage")
+        else:
+            self.p("\vjmp  {:s}.start\v; call user code".format(self.module.main().label))
+        self.p("")
+        for varname, (vname, bytes, fpvalue) in zp_float_bytes.items():
+            self.p("_float_bytes_{:s}\v.byte  ${:02x}, ${:02x}, ${:02x}, ${:02x}, ${:02x}\t; {}".format(varname, *bytes, fpvalue))
+        self.p("\n")
+
+    def blocks(self):
+        self.p("; @todo")   # @todo
+        pass
+
+    def footer(self):
+        self.p("; @todo")   # @todo
+        pass
+
+
+class Assembler64Tass:
+    def __init__(self, format: ProgramFormat) -> None:
+        self.format = format
+
+    def assemble(self, inputfilename: str, outputfilename: str) -> None:
+        args = ["64tass", "--ascii", "--case-sensitive", "-Wall", "-Wno-strict-bool",
+                "--dump-labels", "--vice-labels", "-l", outputfilename+".vice-mon-list",
+                "-L", outputfilename+".final-asm", "--no-monitor", "--output", outputfilename, inputfilename]
+        if self.format in (ProgramFormat.PRG, ProgramFormat.BASIC):
+            args.append("--cbm-prg")
+        elif self.format == ProgramFormat.RAW:
+            args.append("--nostart")
+        else:
+            raise ValueError("don't know how to create format "+str(self.format))
+        try:
+            if self.format == ProgramFormat.PRG:
+                print("\nCreating C-64 prg.")
+            elif self.format == ProgramFormat.RAW:
+                print("\nCreating raw binary.")
+            try:
+                subprocess.check_call(args)
+            except FileNotFoundError as x:
+                raise SystemExit("ERROR: cannot run assembler program: "+str(x))
+        except subprocess.CalledProcessError as x:
+            raise SystemExit("assembler failed with returncode " + str(x.returncode))
+
+    def generate_breakpoint_list(self, program_filename: str) -> str:
+        breakpoints = []
+        with open(program_filename + ".final-asm", "rU") as f:
+            for line in f:
+                match = re.fullmatch(AssemblyGenerator.BREAKPOINT_COMMENT_DETECTOR, line, re.DOTALL)
+                if match:
+                    breakpoints.append("$" + match.group("address"))
+        cmdfile = program_filename + ".vice-mon-list"
+        with open(cmdfile, "at") as f:
+            print("; vice monitor breakpoint list now follows", file=f)
+            print("; {:d} breakpoints have been defined here".format(len(breakpoints)), file=f)
+            print("del", file=f)
+            for b in breakpoints:
+                print("break", b, file=f)
+        return cmdfile
--- a/il65/main.py
+++ b/il65/main.py
@ -9,10 +9,10 @@ import time
 import os
 import argparse
 import subprocess
-from .handwritten.parse import Parser
-from .handwritten.optimize import Optimizer
-from .handwritten.preprocess import PreprocessingParser
-from .handwritten.codegen import CodeGenerator, Assembler64Tass
+from .compile import PlyParser
+from .optimize import optimize
+from .generateasm import AssemblyGenerator, Assembler64Tass
+from .plylex import print_bold


 def main() -> None:
@ -33,29 +33,24 @@ def main() -> None:
    print("\n" + description)

    start = time.perf_counter()
-    pp = PreprocessingParser(args.sourcefile, set())
-    sourcelines, symbols = pp.preprocess()
-    # symbols.print_table()
-
-    p = Parser(args.sourcefile, args.output, set(), sourcelines=sourcelines, ppsymbols=symbols, sub_usage=pp.result.subroutine_usage)
-    parsed = p.parse()
-    if parsed:
+    print("\nParsing program source code.")
+    parser = PlyParser()
+    parsed_module = parser.parse_file(args.sourcefile)
+    if parsed_module:
        if args.nooptimize:
-            p.print_bold("not optimizing the parse tree!")
+            print_bold("not optimizing the parse tree!")
        else:
-            opt = Optimizer(parsed)
-            parsed = opt.optimize()
-        cg = CodeGenerator(parsed)
-        cg.generate()
-        cg.optimize()
-        with open(assembly_filename, "wt") as out:
-            cg.write_assembly(out)
-        assembler = Assembler64Tass(parsed.format)
+            print("\nOptimizing parse tree.")
+            optimize(parsed_module)
+        print("\nGenerating assembly code.")
+        cg = AssemblyGenerator(parsed_module)
+        cg.generate(assembly_filename)
+        assembler = Assembler64Tass(parsed_module.format)
        assembler.assemble(assembly_filename, program_filename)
        mon_command_file = assembler.generate_breakpoint_list(program_filename)
        duration_total = time.perf_counter() - start
        print("Compile duration:  {:.2f} seconds".format(duration_total))
-        p.print_bold("Output file:       " + program_filename)
+        print_bold("Output file:       " + program_filename)
        print()
        if args.startvice:
            print("Autostart vice emulator...")
--- a/il65/main_old.py
+++ b/il65/main_old.py
@ -0,0 +1,65 @@
+"""
+Programming Language for 6502/6510 microprocessors, codename 'Sick'
+This is the main program that drives the rest.
+
+Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
+"""
+
+import time
+import os
+import argparse
+import subprocess
+from .handwritten.parse import Parser
+from .handwritten.optimize import Optimizer
+from .handwritten.preprocess import PreprocessingParser
+from .handwritten.codegen import CodeGenerator, Assembler64Tass
+
+
+def main() -> None:
+    description = "Compiler for IL65 language, code name 'Sick'"
+    ap = argparse.ArgumentParser(description=description)
+    ap.add_argument("-o", "--output", help="output directory")
+    ap.add_argument("-no", "--nooptimize", action="store_true", help="do not optimize the parse tree")
+    ap.add_argument("-sv", "--startvice", action="store_true", help="autostart vice x64 emulator after compilation")
+    ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile")
+    args = ap.parse_args()
+    assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm"
+    program_filename = os.path.splitext(args.sourcefile)[0] + ".prg"
+    if args.output:
+        os.makedirs(args.output, mode=0o700, exist_ok=True)
+        assembly_filename = os.path.join(args.output, os.path.split(assembly_filename)[1])
+        program_filename = os.path.join(args.output, os.path.split(program_filename)[1])
+
+    print("\n" + description)
+
+    start = time.perf_counter()
+    pp = PreprocessingParser(args.sourcefile, set())
+    sourcelines, symbols = pp.preprocess()
+    # symbols.print_table()
+
+    p = Parser(args.sourcefile, args.output, set(), sourcelines=sourcelines, ppsymbols=symbols, sub_usage=pp.result.subroutine_usage)
+    parsed = p.parse()
+    if parsed:
+        if args.nooptimize:
+            p.print_bold("not optimizing the parse tree!")
+        else:
+            opt = Optimizer(parsed)
+            parsed = opt.optimize()
+        cg = CodeGenerator(parsed)
+        cg.generate()
+        cg.optimize()
+        with open(assembly_filename, "wt") as out:
+            cg.write_assembly(out)
+        assembler = Assembler64Tass(parsed.format)
+        assembler.assemble(assembly_filename, program_filename)
+        mon_command_file = assembler.generate_breakpoint_list(program_filename)
+        duration_total = time.perf_counter() - start
+        print("Compile duration:  {:.2f} seconds".format(duration_total))
+        p.print_bold("Output file:       " + program_filename)
+        print()
+        if args.startvice:
+            print("Autostart vice emulator...")
+            cmdline = ["x64", "-remotemonitor", "-moncommands", mon_command_file,
+                       "-autostartprgmode", "1", "-autostart-warp", "-autostart", program_filename]
+            with open(os.devnull, "wb") as shutup:
+                subprocess.call(cmdline, stdout=shutup)
--- a/il65/optimizer.py
+++ b/il65/optimizer.py
@ -5,9 +5,8 @@ This is the optimizer that applies various optimizations to the parse tree.
 Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

-from typing import no_type_check
-from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression
-from .plylexer import print_warning, print_bold
+from .plyparse import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression
+from .plylex import print_warning, print_bold


 class Optimizer:
@ -27,7 +26,7 @@ class Optimizer:
    def remove_useless_assigns(self):
        # remove assignment statements that do nothing (A=A)
        # and augmented assignments that have no effect (A+=0)
-        # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false
+        # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false  (or perhaps turn them into byte values first?)
        for block, parent in self.module.all_scopes():
            if block.scope:
                for assignment in list(block.scope.nodes):
@ -63,10 +62,10 @@ class Optimizer:
                                continue
                            elif len(assignments) > 1:
                                # replace the first assignment by a multi-assign with all the others
-                                for stmt in assignments[1:]:
-                                    print("{}: joined with previous assignment".format(stmt.sourceref))
-                                    assignments[0].left.extend(stmt.left)
-                                    block.scope.remove_node(stmt)
+                                for assignment in assignments[1:]:
+                                    print("{}: joined with previous assignment".format(assignment.sourceref))
+                                    assignments[0].left.extend(assignment.left)
+                                    block.scope.remove_node(assignment)
                                rvalue = None
                                assignments.clear()
                        else:
@ -165,7 +164,4 @@ def optimize(mod: Module) -> None:
    opt = Optimizer(mod)
    opt.optimize()
    if opt.num_warnings:
-        if opt.num_warnings == 1:
-            print_bold("\nThere is one optimization warning.\n")
-        else:
-            print_bold("\nThere are {:d} optimization warnings.\n".format(opt.num_warnings))
+        print_bold("There are {:d} optimization warnings.".format(opt.num_warnings))
--- a/il65/plylexer.py
+++ b/il65/plylexer.py
--- a/il65/plyparser.py
+++ b/il65/plyparser.py
@ -5,14 +5,36 @@ This is the parser of the IL65 code, that generates a parse tree.
 Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
 """

+import enum
 from collections import defaultdict
-from typing import Union, Generator, Tuple, List
+from typing import Union, Generator, Tuple, List, Optional, Dict
 import attr
 from ply.yacc import yacc
-from .plylexer import SourceRef, tokens, lexer, find_tok_column
+from .plylex import SourceRef, tokens, lexer, find_tok_column
 from .symbols import DataType


+class ProgramFormat(enum.Enum):
+    RAW = "raw"
+    PRG = "prg"
+    BASIC = "basicprg"
+
+
+class ZpOptions(enum.Enum):
+    NOCLOBBER = "noclobber"
+    CLOBBER = "clobber"
+    CLOBBER_RESTORE = "clobber_restore"
+
+
+class ParseError(Exception):
+    def __init__(self, message: str, sourceref: SourceRef) -> None:
+        super().__init__(message)
+        self.sourceref = sourceref
+
+    def __str__(self):
+        return "{} {:s}".format(self.sourceref, self.args[0])
+
+
 start = "start"


@ -65,21 +87,30 @@ class Scope(AstNode):
    symbols = attr.ib(init=False)
    name = attr.ib(init=False)          # will be set by enclosing block, or subroutine etc.
    parent_scope = attr.ib(init=False, default=None)  # will be wired up later
-    save_registers = attr.ib(type=bool, default=False, init=False)    # XXX will be set later
+    save_registers = attr.ib(type=bool, default=None, init=False)    # None = look in parent scope's setting

    def __attrs_post_init__(self):
        # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"]
        self.symbols = {}
        for node in self.nodes:
            assert isinstance(node, AstNode)
+            self._populate_symboltable(node)
+
+    def _populate_symboltable(self, node: AstNode) -> None:
        if isinstance(node, (Label, VarDef)):
+            if node.name in self.symbols:
+                raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref)
            self.symbols[node.name] = node
        if isinstance(node, Subroutine):
+            if node.name in self.symbols:
+                raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref)
            self.symbols[node.name] = node
            if node.scope:
                node.scope.parent_scope = self
        if isinstance(node, Block):
            if node.name:
+                if node.name != "ZP" and node.name in self.symbols:
+                    raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref)
                self.symbols[node.name] = node
                node.scope.parent_scope = self

@ -113,7 +144,10 @@ class Scope(AstNode):

    def remove_node(self, node: AstNode) -> None:
        if hasattr(node, "name"):
-            del self.symbols[node.name]
+            try:
+                del self.symbols[node.name]     # type: ignore
+            except KeyError:
+                pass
        self.nodes.remove(node)

    def replace_node(self, oldnode: AstNode, newnode: AstNode) -> None:
@ -121,7 +155,45 @@ class Scope(AstNode):
        idx = self.nodes.index(oldnode)
        self.nodes[idx] = newnode
        if hasattr(oldnode, "name"):
-            del self.symbols[oldnode.name]
+            del self.symbols[oldnode.name]  # type: ignore
+
+    def add_node(self, newnode: AstNode, index: int=None) -> None:
+        assert isinstance(newnode, AstNode)
+        if index is None:
+            self.nodes.append(newnode)
+        else:
+            self.nodes.insert(index, newnode)
+        self._populate_symboltable(newnode)
+
+
+def validate_address(object: AstNode, attrib: attr.Attribute, value: Optional[int]):
+    if value is None:
+        return
+    if isinstance(object, Block) and object.name == "ZP":
+        raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), object.sourceref)
+    if value < 0x0200 or value > 0xffff:
+        raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), object.sourceref)
+
+
+@attr.s(cmp=False, repr=False)
+class Block(AstNode):
+    scope = attr.ib(type=Scope)
+    name = attr.ib(type=str, default=None)
+    address = attr.ib(type=int, default=None, validator=validate_address)
+    _unnamed_block_labels = {}  # type: Dict[Block, str]
+
+    def __attrs_post_init__(self):
+        self.scope.name = self.name
+
+    @property
+    def label(self) -> str:
+        if self.name:
+            return self.name
+        if self in self._unnamed_block_labels:
+            return self._unnamed_block_labels[self]
+        label = "il65_block_{:d}".format(len(self._unnamed_block_labels))
+        self._unnamed_block_labels[self] = label
+        return label


@attr.s(cmp=False, repr=False)
@ -129,6 +201,9 @@ class Module(AstNode):
    name = attr.ib(type=str)     # filename
    scope = attr.ib(type=Scope)
    subroutine_usage = attr.ib(type=defaultdict, init=False, default=attr.Factory(lambda: defaultdict(set)))    # will be populated later
+    format = attr.ib(type=ProgramFormat, init=False, default=ProgramFormat.PRG)     # can be set via directive
+    address = attr.ib(type=int, init=False, default=0xc000, validator=validate_address)     # can be set via directive
+    zp_options = attr.ib(type=ZpOptions, init=False, default=ZpOptions.NOCLOBBER)    # can be set via directive

    def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]:
        # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
@ -139,15 +214,19 @@ class Module(AstNode):
            for subroutine in list(block.scope.filter_nodes(Subroutine)):
                yield subroutine, block

+    def zeropage(self) -> Optional[Block]:
+        # return the zeropage block (if defined)
+        first_block = next(self.scope.filter_nodes(Block))
+        if first_block.name == "ZP":
+            return first_block
+        return None

-@attr.s(cmp=False, repr=False)
-class Block(AstNode):
-    scope = attr.ib(type=Scope)
-    name = attr.ib(type=str, default=None)
-    address = attr.ib(type=int, default=None)
-
-    def __attrs_post_init__(self):
-        self.scope.name = self.name
+    def main(self) -> Optional[Block]:
+        # return the 'main' block (if defined)
+        for block in self.scope.filter_nodes(Block):
+            if block.name == "main":
+                return block
+        return None


@attr.s(cmp=False, repr=False)
@ -283,7 +362,7 @@ class Subroutine(AstNode):
    param_spec = attr.ib()
    result_spec = attr.ib()
    scope = attr.ib(type=Scope, default=None)
-    address = attr.ib(type=int, default=None)
+    address = attr.ib(type=int, default=None, validator=validate_address)

    def __attrs_post_init__(self):
        if self.scope and self.address is not None:
@ -392,7 +471,7 @@ def p_directive(p):
    directive :  DIRECTIVE  ENDL
              |  DIRECTIVE  directive_args  ENDL
    """
-    if len(p) == 2:
+    if len(p) == 3:
        p[0] = Directive(name=p[1], sourceref=_token_sref(p, 1))
    else:
        p[0] = Directive(name=p[1], args=p[2], sourceref=_token_sref(p, 1))
@ -423,14 +502,14 @@ def p_block_name_addr(p):
    """
    block :  BITINVERT  NAME  INTEGER  endl_opt  scope
    """
-    p[0] = Block(name=p[2], address=p[3], scope=p[5], sourceref=_token_sref(p, 1))
+    p[0] = Block(name=p[2], address=p[3], scope=p[5], sourceref=_token_sref(p, 2))


 def p_block_name(p):
    """
    block :  BITINVERT  NAME  endl_opt  scope
    """
-    p[0] = Block(name=p[2], scope=p[4], sourceref=_token_sref(p, 1))
+    p[0] = Block(name=p[2], scope=p[4], sourceref=_token_sref(p, 2))


 def p_block(p):
@ -511,14 +590,14 @@ def p_vardef(p):
    """
    vardef :  VARTYPE  type_opt  NAME  ENDL
    """
-    p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], sourceref=_token_sref(p, 1))
+    p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], sourceref=_token_sref(p, 3))


 def p_vardef_value(p):
    """
    vardef :  VARTYPE  type_opt  NAME  IS  expression
    """
-    p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], value=p[5], sourceref=_token_sref(p, 1))
+    p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], value=p[5], sourceref=_token_sref(p, 3))


 def p_type_opt(p):
--- a/il65/symbols.py
+++ b/il65/symbols.py
@ -24,3 +24,15 @@ class DataType(enum.Enum):


 STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS}
+
+
+def to_hex(number: int) -> str:
+    # 0..255 -> "$00".."$ff"
+    # 256..65536 -> "$0100".."$ffff"
+    if number is None:
+        raise ValueError("number")
+    if 0 <= number < 0x100:
+        return "${:02x}".format(number)
+    if 0 <= number < 0x10000:
+        return "${:04x}".format(number)
+    raise OverflowError(number)
--- a/tests/test_compiler.py
+++ b/tests/test_compiler.py
@ -1,4 +1,4 @@
-from il65.compiler import PlyParser
+from il65.compile import PlyParser


 def test_compiler():
--- a/tests/test_core.py
+++ b/tests/test_core.py
@ -1,6 +1,7 @@
-from il65.symbols import DataType, STRING_DATATYPES
-from il65.compiler import ParseError
-from il65.plylexer import SourceRef
+import pytest
+from il65.symbols import DataType, STRING_DATATYPES, to_hex
+from il65.compile import ParseError
+from il65.plylex import SourceRef


 def test_datatypes():
@ -15,6 +16,19 @@ def test_sourceref():


 def test_parseerror():
-    p = ParseError("message", "source code", SourceRef("filename", 99, 42))
+    p = ParseError("message", SourceRef("filename", 99, 42))
    assert p.args == ("message", )
    assert str(p) == "filename:99:42 message"
+
+
+def test_to_hex():
+    assert to_hex(0) == "$00"
+    assert to_hex(1) == "$01"
+    assert to_hex(255) == "$ff"
+    assert to_hex(256) == "$0100"
+    assert to_hex(20060) == "$4e5c"
+    assert to_hex(65535) == "$ffff"
+    with pytest.raises(OverflowError):
+        to_hex(-1)
+    with pytest.raises(OverflowError):
+        to_hex(65536)
--- a/tests/test_optimizer.py
+++ b/tests/test_optimizer.py
@ -1,4 +1,4 @@
-from il65.optimizer import Optimizer
+from il65.optimize import Optimizer


 def test_optimizer():
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@ -1,5 +1,5 @@
-from il65.plylexer import lexer, tokens, find_tok_column, literals, reserved
-from il65.plyparser import parser, TokenFilter, Module, Subroutine, Block, Return
+from il65.plylex import lexer, tokens, find_tok_column, literals, reserved
+from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return


 def test_lexer_definitions():
--- a/testsource/dtypes.ill
+++ b/testsource/dtypes.ill
@ -13,7 +13,7 @@
 	; you can NOT put subroutines in here (yet).
 }

-~ ZP $0004 {
+~ ZP {
 	var    zpvar1
 	var    zpvar2
 	memory zpmem1   = $f0
--- a/todo.ill
+++ b/todo.ill
@ -1,10 +1,14 @@
-%output prg,basic
+%output prg
+%saveregisters
 %import c64lib
 %import mathlib

+%address 22222
+
+
 ~ main $4444 {

-	%saveregisters true, false
+	%saveregisters true


                const   num = 2
@ -186,7 +190,8 @@ loop   :

 sub sub1 () -> () {

-	%saveregisters off
+	%saveregisters no
+	%breakpoint
 	%breakpoint
 	%breakpoint

@ -197,7 +202,7 @@ label:

 sub emptysub () -> () {

-	%saveregisters on
+	%saveregisters yes

 }