diff --git a/il65/compiler.py b/il65/compile.py similarity index 66% rename from il65/compiler.py rename to il65/compile.py index 50e84d110..63d26d31a 100644 --- a/il65/compiler.py +++ b/il65/compile.py @@ -10,20 +10,11 @@ import os import sys import linecache from typing import Optional, Tuple, Set, Dict, Any, no_type_check -from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \ - SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression -from .plylexer import SourceRef, print_bold -from .optimizer import optimize - - -class ParseError(Exception): - def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None: - super().__init__(message) - self.sourceref = sourceref - self.sourcetext = sourcetext - - def __str__(self): - return "{} {:s}".format(self.sourceref, self.args[0]) +import attr +from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, \ + SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions +from .plylex import SourceRef, print_bold +from .optimize import optimize class PlyParser: @@ -33,14 +24,18 @@ class PlyParser: def parse_file(self, filename: str) -> Module: print("parsing:", filename) - module = parse_file(filename, self.lexer_error) + module = None try: + module = parse_file(filename, self.lexer_error) self.check_directives(module) self.process_imports(module) self.create_multiassigns(module) self.process_all_expressions(module) if not self.parsing_import: + # these shall only be done on the main module after all imports have been done: + self.apply_directive_options(module) self.determine_subroutine_usage(module) + self.check_and_merge_zeropages(module) except ParseError as x: self.handle_parse_error(x) if self.parse_errors: @@ -52,6 +47,27 @@ class PlyParser: self.parse_errors += 1 print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args))) + def check_and_merge_zeropages(self, module: Module) -> None: + # merge all ZP blocks into one + zeropage = None + for block in list(module.scope.filter_nodes(Block)): + if block.name == "ZP": + if zeropage: + # merge other ZP block into first ZP block + for node in block.scope.nodes: + if isinstance(node, Directive): + zeropage.scope.add_node(node, 0) + elif isinstance(node, VarDef): + zeropage.scope.add_node(node) + else: + raise ParseError("only variables and directives allowed in zeropage block", node.sourceref) + else: + zeropage = block + module.scope.remove_node(block) + if zeropage: + # add the zero page again, as the very first block + module.scope.add_node(zeropage, 0) + @no_type_check def process_all_expressions(self, module: Module) -> None: # process/simplify all expressions (constant folding etc) @@ -82,6 +98,82 @@ class PlyParser: assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment) node.simplify_targetregisters() + def apply_directive_options(self, module: Module) -> None: + def set_save_registers(scope: Scope, save_dir: Directive) -> None: + if not scope: + return + if len(save_dir.args) > 1: + raise ParseError("need zero or one directive argument", save_dir.sourceref) + if save_dir.args: + if save_dir.args[0] in ("yes", "true"): + scope.save_registers = True + elif save_dir.args[0] in ("no", "false"): + scope.save_registers = False + else: + raise ParseError("invalid directive args", save_dir.sourceref) + else: + scope.save_registers = True + + for block, parent in module.all_scopes(): + if isinstance(block, Module): + # process the module's directives + for directive in block.scope.filter_nodes(Directive): + if directive.name == "output": + if len(directive.args) != 1 or not isinstance(directive.args[0], str): + raise ParseError("need one str directive argument", directive.sourceref) + if directive.args[0] == "raw": + block.format = ProgramFormat.RAW + block.address = 0xc000 + elif directive.args[0] == "prg": + block.format = ProgramFormat.PRG + block.address = 0x0801 + elif directive.args[0] == "basic": + block.format = ProgramFormat.BASIC + block.address = 0x0801 + else: + raise ParseError("invalid directive args", directive.sourceref) + elif directive.name == "address": + if len(directive.args) != 1 or not isinstance(directive.args[0], int): + raise ParseError("need one integer directive argument", directive.sourceref) + if block.format == ProgramFormat.BASIC: + raise ParseError("basic cannot have a custom load address", directive.sourceref) + block.address = directive.args[0] + attr.validate(block) + elif directive.name in "import": + pass # is processed earlier + elif directive.name == "zp": + if len(directive.args) not in (1, 2) or set(directive.args) - {"clobber", "restore"}: + raise ParseError("invalid directive args", directive.sourceref) + if "clobber" in directive.args and "restore" in directive.args: + module.zp_options = ZpOptions.CLOBBER_RESTORE + elif "clobber" in directive.args: + module.zp_options = ZpOptions.CLOBBER + elif "restore" in directive.args: + raise ParseError("invalid directive args", directive.sourceref) + elif directive.name == "saveregisters": + set_save_registers(block.scope, directive) + else: + raise NotImplementedError(directive.name) + elif isinstance(block, Block): + # process the block's directives + for directive in block.scope.filter_nodes(Directive): + if directive.name == "saveregisters": + set_save_registers(block.scope, directive) + elif directive.name in ("breakpoint", "asmbinary", "asminclude"): + continue + else: + raise NotImplementedError(directive.name) + elif isinstance(block, Subroutine): + if block.scope: + # process the sub's directives + for directive in block.scope.filter_nodes(Directive): + if directive.name == "saveregisters": + set_save_registers(block.scope, directive) + elif directive.name in ("breakpoint", "asmbinary", "asminclude"): + continue + else: + raise NotImplementedError(directive.name) + @no_type_check def determine_subroutine_usage(self, module: Module) -> None: module.subroutine_usage.clear() @@ -177,10 +269,10 @@ class PlyParser: imports = set() # type: Set[str] for directive in node.scope.filter_nodes(Directive): if directive.name not in {"output", "zp", "address", "import", "saveregisters"}: - raise ParseError("invalid directive in module", None, directive.sourceref) + raise ParseError("invalid directive in module", directive.sourceref) if directive.name == "import": if imports & set(directive.args): - raise ParseError("duplicate import", None, directive.sourceref) + raise ParseError("duplicate import", directive.sourceref) imports |= set(directive.args) if isinstance(node, (Block, Subroutine)): # check block and subroutine-level directives @@ -190,9 +282,9 @@ class PlyParser: for sub_node in node.scope.nodes: if isinstance(sub_node, Directive): if sub_node.name not in {"asmbinary", "asminclude", "breakpoint", "saveregisters"}: - raise ParseError("invalid directive in " + node.__class__.__name__.lower(), None, sub_node.sourceref) + raise ParseError("invalid directive in " + node.__class__.__name__.lower(), sub_node.sourceref) if sub_node.name == "saveregisters" and not first_node: - raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref) + raise ParseError("saveregisters directive should be the first", sub_node.sourceref) first_node = False def process_imports(self, module: Module) -> None: @@ -201,11 +293,11 @@ class PlyParser: for directive in module.scope.filter_nodes(Directive): if directive.name == "import": if len(directive.args) < 1: - raise ParseError("missing argument(s) for import directive", None, directive.sourceref) + raise ParseError("missing argument(s) for import directive", directive.sourceref) for arg in directive.args: filename = self.find_import_file(arg, directive.sourceref.file) if not filename: - raise ParseError("imported file not found", None, directive.sourceref) + raise ParseError("imported file not found", directive.sourceref) imported_module, import_parse_errors = self.import_file(filename) imported_module.scope.parent_scope = module.scope imported.append(imported_module) @@ -252,16 +344,11 @@ class PlyParser: print("Error (in imported file):", str(exc), file=sys.stderr) else: print("Error:", str(exc), file=sys.stderr) - if exc.sourcetext is None: - exc.sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip() - if exc.sourcetext: - # remove leading whitespace - stripped = exc.sourcetext.lstrip() - num_spaces = len(exc.sourcetext) - len(stripped) - stripped = stripped.rstrip() - print(" " + stripped, file=sys.stderr) + sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip() + if sourcetext: + print(" " + sourcetext.expandtabs(1), file=sys.stderr) if exc.sourceref.column: - print(" " + ' ' * (exc.sourceref.column - num_spaces) + '^', file=sys.stderr) + print(' ' * (1+exc.sourceref.column) + '^', file=sys.stderr) if sys.stderr.isatty(): print("\x1b[0m", file=sys.stderr, end="", flush=True) diff --git a/il65/generateasm.py b/il65/generateasm.py new file mode 100644 index 000000000..4487987cf --- /dev/null +++ b/il65/generateasm.py @@ -0,0 +1,194 @@ +""" +Programming Language for 6502/6510 microprocessors, codename 'Sick' +This is the assembly code generator (from the parse tree) + +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +""" + +import io +import re +import subprocess +import datetime +from typing import Union +from .plyparse import Module, ProgramFormat, Block, Directive, VarDef, Label, ZpOptions, DataType +from .symbols import to_hex + + +class CodeError(Exception): + pass + + +class AssemblyGenerator: + BREAKPOINT_COMMENT_SIGNATURE = "~~~BREAKPOINT~~~" + BREAKPOINT_COMMENT_DETECTOR = r".(?P
\w+)\s+ea\s+nop\s+;\s+{:s}.*".format(BREAKPOINT_COMMENT_SIGNATURE) + + def __init__(self, module: Module) -> None: + self.module = module + self.generated_code = io.StringIO() + + def p(self, text, *args, **vargs): + # replace '\v' (vertical tab) char by the actual line indent (2 tabs) and write to the stringIo + print(text.replace("\v", "\t\t"), *args, file=self.generated_code, **vargs) + + def generate(self, filename: str) -> None: + self._generate() + with open(filename, "wt") as out: + out.write(self.generated_code.getvalue()) + self.generated_code.close() + + def _generate(self) -> None: + self.sanitycheck() + self.header() + self.initialize_variables() + self.blocks() + self.footer() + + def sanitycheck(self): + # duplicate block names? + all_blocknames = [b.name for b in self.module.scope.filter_nodes(Block)] + unique_blocknames = set(all_blocknames) + if len(all_blocknames) != len(unique_blocknames): + for name in unique_blocknames: + all_blocknames.remove(name) + raise CodeError("there are duplicate block names", all_blocknames) + zpblock = self.module.zeropage() + if zpblock: + # ZP block contains no code? + for stmt in zpblock.scope.nodes: + if not isinstance(stmt, (Directive, VarDef)): + raise CodeError("ZP block can only contain directive and var") + + def header(self): + self.p("; code generated by il65.py - codename 'Sick'") + self.p("; source file:", self.module.sourceref.file) + self.p("; compiled on:", datetime.datetime.now()) + self.p("; output options:", self.module.format, self.module.zp_options) + self.p("; assembler syntax is for the 64tasm cross-assembler") + self.p("\n.cpu '6502'\n.enc 'none'\n") + assert self.module.address is not None + if self.module.format in (ProgramFormat.PRG, ProgramFormat.BASIC): + if self.module.format == ProgramFormat.BASIC: + if self.module.address != 0x0801: + raise CodeError("BASIC output mode must have load address $0801") + self.p("; ---- basic program with sys call ----") + self.p("* = " + to_hex(self.module.address)) + year = datetime.datetime.now().year + self.p("\v.word (+), {:d}".format(year)) + self.p("\v.null $9e, format(' %d ', _il65_sysaddr), $3a, $8f, ' il65 by idj'") + self.p("+\v.word 0") + self.p("_il65_sysaddr\v; assembly code starts here\n") + else: + self.p("; ---- program without sys call ----") + self.p("* = " + to_hex(self.module.address) + "\n") + elif self.module.format == ProgramFormat.RAW: + self.p("; ---- raw assembler program ----") + self.p("* = " + to_hex(self.module.address) + "\n") + + def initialize_variables(self) -> None: + if self.module.zp_options == ZpOptions.CLOBBER_RESTORE: + self.p("\vjsr il65_lib_zp.save_zeropage") + zp_float_bytes = {} + # Only the vars from the ZeroPage need to be initialized here, + # the vars in all other blocks are just defined and pre-filled there. + zpblock = self.module.zeropage() + if zpblock: + vars_to_init = [v for v in zpblock.scope.filter_nodes(VarDef) + if v.allocate and v.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT)] + # @todo optimize sort order (sort on value first, then type, then blockname, then address/name) + # (str(self.value) or "", self.blockname, self.name or "", self.address or 0, self.seq_nr) + prev_value = 0 # type: Union[str, int, float] + if vars_to_init: + self.p("; init zp vars") + self.p("\vlda #0\n\vldx #0") + for variable in vars_to_init: + vname = zpblock.label + '.' + variable.name + vvalue = variable.value + if variable.type == DataType.BYTE: + if vvalue != prev_value: + self.p("\vlda #${:02x}".format(vvalue)) + prev_value = vvalue + self.p("\vsta {:s}".format(vname)) + elif variable.type == DataType.WORD: + if vvalue != prev_value: + self.p("\vlda #<${:04x}".format(vvalue)) + self.p("\vldx #>${:04x}".format(vvalue)) + prev_value = vvalue + self.p("\vsta {:s}".format(vname)) + self.p("\vstx {:s}+1".format(vname)) + elif variable.type == DataType.FLOAT: + bytes = self.to_mflpt5(vvalue) # type: ignore + zp_float_bytes[variable.name] = (vname, bytes, vvalue) + if zp_float_bytes: + self.p("\vldx #4") + self.p("-") + for varname, (vname, b, fv) in zp_float_bytes.items(): + self.p("\vlda _float_bytes_{:s},x".format(varname)) + self.p("\vsta {:s},x".format(vname)) + self.p("\vdex") + self.p("\vbpl -") + self.p("; end init zp vars") + else: + self.p("\v; there are no zp vars to initialize") + else: + self.p("\v; there is no zp block to initialize") + if self.module.zp_options == ZpOptions.CLOBBER_RESTORE: + self.p("\vjsr {:s}.start\v; call user code".format(self.module.main().label)) + self.p("\vcld") + self.p("\vjmp il65_lib_zp.restore_zeropage") + else: + self.p("\vjmp {:s}.start\v; call user code".format(self.module.main().label)) + self.p("") + for varname, (vname, bytes, fpvalue) in zp_float_bytes.items(): + self.p("_float_bytes_{:s}\v.byte ${:02x}, ${:02x}, ${:02x}, ${:02x}, ${:02x}\t; {}".format(varname, *bytes, fpvalue)) + self.p("\n") + + def blocks(self): + self.p("; @todo") # @todo + pass + + def footer(self): + self.p("; @todo") # @todo + pass + + +class Assembler64Tass: + def __init__(self, format: ProgramFormat) -> None: + self.format = format + + def assemble(self, inputfilename: str, outputfilename: str) -> None: + args = ["64tass", "--ascii", "--case-sensitive", "-Wall", "-Wno-strict-bool", + "--dump-labels", "--vice-labels", "-l", outputfilename+".vice-mon-list", + "-L", outputfilename+".final-asm", "--no-monitor", "--output", outputfilename, inputfilename] + if self.format in (ProgramFormat.PRG, ProgramFormat.BASIC): + args.append("--cbm-prg") + elif self.format == ProgramFormat.RAW: + args.append("--nostart") + else: + raise ValueError("don't know how to create format "+str(self.format)) + try: + if self.format == ProgramFormat.PRG: + print("\nCreating C-64 prg.") + elif self.format == ProgramFormat.RAW: + print("\nCreating raw binary.") + try: + subprocess.check_call(args) + except FileNotFoundError as x: + raise SystemExit("ERROR: cannot run assembler program: "+str(x)) + except subprocess.CalledProcessError as x: + raise SystemExit("assembler failed with returncode " + str(x.returncode)) + + def generate_breakpoint_list(self, program_filename: str) -> str: + breakpoints = [] + with open(program_filename + ".final-asm", "rU") as f: + for line in f: + match = re.fullmatch(AssemblyGenerator.BREAKPOINT_COMMENT_DETECTOR, line, re.DOTALL) + if match: + breakpoints.append("$" + match.group("address")) + cmdfile = program_filename + ".vice-mon-list" + with open(cmdfile, "at") as f: + print("; vice monitor breakpoint list now follows", file=f) + print("; {:d} breakpoints have been defined here".format(len(breakpoints)), file=f) + print("del", file=f) + for b in breakpoints: + print("break", b, file=f) + return cmdfile diff --git a/il65/main.py b/il65/main.py index e770a24d0..4c24ada92 100644 --- a/il65/main.py +++ b/il65/main.py @@ -9,10 +9,10 @@ import time import os import argparse import subprocess -from .handwritten.parse import Parser -from .handwritten.optimize import Optimizer -from .handwritten.preprocess import PreprocessingParser -from .handwritten.codegen import CodeGenerator, Assembler64Tass +from .compile import PlyParser +from .optimize import optimize +from .generateasm import AssemblyGenerator, Assembler64Tass +from .plylex import print_bold def main() -> None: @@ -33,29 +33,24 @@ def main() -> None: print("\n" + description) start = time.perf_counter() - pp = PreprocessingParser(args.sourcefile, set()) - sourcelines, symbols = pp.preprocess() - # symbols.print_table() - - p = Parser(args.sourcefile, args.output, set(), sourcelines=sourcelines, ppsymbols=symbols, sub_usage=pp.result.subroutine_usage) - parsed = p.parse() - if parsed: + print("\nParsing program source code.") + parser = PlyParser() + parsed_module = parser.parse_file(args.sourcefile) + if parsed_module: if args.nooptimize: - p.print_bold("not optimizing the parse tree!") + print_bold("not optimizing the parse tree!") else: - opt = Optimizer(parsed) - parsed = opt.optimize() - cg = CodeGenerator(parsed) - cg.generate() - cg.optimize() - with open(assembly_filename, "wt") as out: - cg.write_assembly(out) - assembler = Assembler64Tass(parsed.format) + print("\nOptimizing parse tree.") + optimize(parsed_module) + print("\nGenerating assembly code.") + cg = AssemblyGenerator(parsed_module) + cg.generate(assembly_filename) + assembler = Assembler64Tass(parsed_module.format) assembler.assemble(assembly_filename, program_filename) mon_command_file = assembler.generate_breakpoint_list(program_filename) duration_total = time.perf_counter() - start print("Compile duration: {:.2f} seconds".format(duration_total)) - p.print_bold("Output file: " + program_filename) + print_bold("Output file: " + program_filename) print() if args.startvice: print("Autostart vice emulator...") diff --git a/il65/main_old.py b/il65/main_old.py new file mode 100644 index 000000000..e770a24d0 --- /dev/null +++ b/il65/main_old.py @@ -0,0 +1,65 @@ +""" +Programming Language for 6502/6510 microprocessors, codename 'Sick' +This is the main program that drives the rest. + +Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 +""" + +import time +import os +import argparse +import subprocess +from .handwritten.parse import Parser +from .handwritten.optimize import Optimizer +from .handwritten.preprocess import PreprocessingParser +from .handwritten.codegen import CodeGenerator, Assembler64Tass + + +def main() -> None: + description = "Compiler for IL65 language, code name 'Sick'" + ap = argparse.ArgumentParser(description=description) + ap.add_argument("-o", "--output", help="output directory") + ap.add_argument("-no", "--nooptimize", action="store_true", help="do not optimize the parse tree") + ap.add_argument("-sv", "--startvice", action="store_true", help="autostart vice x64 emulator after compilation") + ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile") + args = ap.parse_args() + assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm" + program_filename = os.path.splitext(args.sourcefile)[0] + ".prg" + if args.output: + os.makedirs(args.output, mode=0o700, exist_ok=True) + assembly_filename = os.path.join(args.output, os.path.split(assembly_filename)[1]) + program_filename = os.path.join(args.output, os.path.split(program_filename)[1]) + + print("\n" + description) + + start = time.perf_counter() + pp = PreprocessingParser(args.sourcefile, set()) + sourcelines, symbols = pp.preprocess() + # symbols.print_table() + + p = Parser(args.sourcefile, args.output, set(), sourcelines=sourcelines, ppsymbols=symbols, sub_usage=pp.result.subroutine_usage) + parsed = p.parse() + if parsed: + if args.nooptimize: + p.print_bold("not optimizing the parse tree!") + else: + opt = Optimizer(parsed) + parsed = opt.optimize() + cg = CodeGenerator(parsed) + cg.generate() + cg.optimize() + with open(assembly_filename, "wt") as out: + cg.write_assembly(out) + assembler = Assembler64Tass(parsed.format) + assembler.assemble(assembly_filename, program_filename) + mon_command_file = assembler.generate_breakpoint_list(program_filename) + duration_total = time.perf_counter() - start + print("Compile duration: {:.2f} seconds".format(duration_total)) + p.print_bold("Output file: " + program_filename) + print() + if args.startvice: + print("Autostart vice emulator...") + cmdline = ["x64", "-remotemonitor", "-moncommands", mon_command_file, + "-autostartprgmode", "1", "-autostart-warp", "-autostart", program_filename] + with open(os.devnull, "wb") as shutup: + subprocess.call(cmdline, stdout=shutup) diff --git a/il65/optimizer.py b/il65/optimize.py similarity index 93% rename from il65/optimizer.py rename to il65/optimize.py index 552bbd1dc..aac3affee 100644 --- a/il65/optimizer.py +++ b/il65/optimize.py @@ -5,9 +5,8 @@ This is the optimizer that applies various optimizations to the parse tree. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ -from typing import no_type_check -from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression -from .plylexer import print_warning, print_bold +from .plyparse import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression +from .plylex import print_warning, print_bold class Optimizer: @@ -27,7 +26,7 @@ class Optimizer: def remove_useless_assigns(self): # remove assignment statements that do nothing (A=A) # and augmented assignments that have no effect (A+=0) - # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false + # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false (or perhaps turn them into byte values first?) for block, parent in self.module.all_scopes(): if block.scope: for assignment in list(block.scope.nodes): @@ -63,10 +62,10 @@ class Optimizer: continue elif len(assignments) > 1: # replace the first assignment by a multi-assign with all the others - for stmt in assignments[1:]: - print("{}: joined with previous assignment".format(stmt.sourceref)) - assignments[0].left.extend(stmt.left) - block.scope.remove_node(stmt) + for assignment in assignments[1:]: + print("{}: joined with previous assignment".format(assignment.sourceref)) + assignments[0].left.extend(assignment.left) + block.scope.remove_node(assignment) rvalue = None assignments.clear() else: @@ -165,7 +164,4 @@ def optimize(mod: Module) -> None: opt = Optimizer(mod) opt.optimize() if opt.num_warnings: - if opt.num_warnings == 1: - print_bold("\nThere is one optimization warning.\n") - else: - print_bold("\nThere are {:d} optimization warnings.\n".format(opt.num_warnings)) + print_bold("There are {:d} optimization warnings.".format(opt.num_warnings)) diff --git a/il65/plylexer.py b/il65/plylex.py similarity index 100% rename from il65/plylexer.py rename to il65/plylex.py diff --git a/il65/plyparser.py b/il65/plyparse.py similarity index 86% rename from il65/plyparser.py rename to il65/plyparse.py index 5b4775263..435bb51b7 100644 --- a/il65/plyparser.py +++ b/il65/plyparse.py @@ -5,14 +5,36 @@ This is the parser of the IL65 code, that generates a parse tree. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ +import enum from collections import defaultdict -from typing import Union, Generator, Tuple, List +from typing import Union, Generator, Tuple, List, Optional, Dict import attr from ply.yacc import yacc -from .plylexer import SourceRef, tokens, lexer, find_tok_column +from .plylex import SourceRef, tokens, lexer, find_tok_column from .symbols import DataType +class ProgramFormat(enum.Enum): + RAW = "raw" + PRG = "prg" + BASIC = "basicprg" + + +class ZpOptions(enum.Enum): + NOCLOBBER = "noclobber" + CLOBBER = "clobber" + CLOBBER_RESTORE = "clobber_restore" + + +class ParseError(Exception): + def __init__(self, message: str, sourceref: SourceRef) -> None: + super().__init__(message) + self.sourceref = sourceref + + def __str__(self): + return "{} {:s}".format(self.sourceref, self.args[0]) + + start = "start" @@ -65,23 +87,32 @@ class Scope(AstNode): symbols = attr.ib(init=False) name = attr.ib(init=False) # will be set by enclosing block, or subroutine etc. parent_scope = attr.ib(init=False, default=None) # will be wired up later - save_registers = attr.ib(type=bool, default=False, init=False) # XXX will be set later + save_registers = attr.ib(type=bool, default=None, init=False) # None = look in parent scope's setting def __attrs_post_init__(self): # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"] self.symbols = {} for node in self.nodes: assert isinstance(node, AstNode) - if isinstance(node, (Label, VarDef)): + self._populate_symboltable(node) + + def _populate_symboltable(self, node: AstNode) -> None: + if isinstance(node, (Label, VarDef)): + if node.name in self.symbols: + raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref) + self.symbols[node.name] = node + if isinstance(node, Subroutine): + if node.name in self.symbols: + raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref) + self.symbols[node.name] = node + if node.scope: + node.scope.parent_scope = self + if isinstance(node, Block): + if node.name: + if node.name != "ZP" and node.name in self.symbols: + raise ParseError("symbol already defined at {}".format(self.symbols[node.name].sourceref), node.sourceref) self.symbols[node.name] = node - if isinstance(node, Subroutine): - self.symbols[node.name] = node - if node.scope: - node.scope.parent_scope = self - if isinstance(node, Block): - if node.name: - self.symbols[node.name] = node - node.scope.parent_scope = self + node.scope.parent_scope = self def __getitem__(self, name: str) -> AstNode: if '.' in name: @@ -113,7 +144,10 @@ class Scope(AstNode): def remove_node(self, node: AstNode) -> None: if hasattr(node, "name"): - del self.symbols[node.name] + try: + del self.symbols[node.name] # type: ignore + except KeyError: + pass self.nodes.remove(node) def replace_node(self, oldnode: AstNode, newnode: AstNode) -> None: @@ -121,7 +155,45 @@ class Scope(AstNode): idx = self.nodes.index(oldnode) self.nodes[idx] = newnode if hasattr(oldnode, "name"): - del self.symbols[oldnode.name] + del self.symbols[oldnode.name] # type: ignore + + def add_node(self, newnode: AstNode, index: int=None) -> None: + assert isinstance(newnode, AstNode) + if index is None: + self.nodes.append(newnode) + else: + self.nodes.insert(index, newnode) + self._populate_symboltable(newnode) + + +def validate_address(object: AstNode, attrib: attr.Attribute, value: Optional[int]): + if value is None: + return + if isinstance(object, Block) and object.name == "ZP": + raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), object.sourceref) + if value < 0x0200 or value > 0xffff: + raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), object.sourceref) + + +@attr.s(cmp=False, repr=False) +class Block(AstNode): + scope = attr.ib(type=Scope) + name = attr.ib(type=str, default=None) + address = attr.ib(type=int, default=None, validator=validate_address) + _unnamed_block_labels = {} # type: Dict[Block, str] + + def __attrs_post_init__(self): + self.scope.name = self.name + + @property + def label(self) -> str: + if self.name: + return self.name + if self in self._unnamed_block_labels: + return self._unnamed_block_labels[self] + label = "il65_block_{:d}".format(len(self._unnamed_block_labels)) + self._unnamed_block_labels[self] = label + return label @attr.s(cmp=False, repr=False) @@ -129,6 +201,9 @@ class Module(AstNode): name = attr.ib(type=str) # filename scope = attr.ib(type=Scope) subroutine_usage = attr.ib(type=defaultdict, init=False, default=attr.Factory(lambda: defaultdict(set))) # will be populated later + format = attr.ib(type=ProgramFormat, init=False, default=ProgramFormat.PRG) # can be set via directive + address = attr.ib(type=int, init=False, default=0xc000, validator=validate_address) # can be set via directive + zp_options = attr.ib(type=ZpOptions, init=False, default=ZpOptions.NOCLOBBER) # can be set via directive def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]: # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples. @@ -139,15 +214,19 @@ class Module(AstNode): for subroutine in list(block.scope.filter_nodes(Subroutine)): yield subroutine, block + def zeropage(self) -> Optional[Block]: + # return the zeropage block (if defined) + first_block = next(self.scope.filter_nodes(Block)) + if first_block.name == "ZP": + return first_block + return None -@attr.s(cmp=False, repr=False) -class Block(AstNode): - scope = attr.ib(type=Scope) - name = attr.ib(type=str, default=None) - address = attr.ib(type=int, default=None) - - def __attrs_post_init__(self): - self.scope.name = self.name + def main(self) -> Optional[Block]: + # return the 'main' block (if defined) + for block in self.scope.filter_nodes(Block): + if block.name == "main": + return block + return None @attr.s(cmp=False, repr=False) @@ -283,7 +362,7 @@ class Subroutine(AstNode): param_spec = attr.ib() result_spec = attr.ib() scope = attr.ib(type=Scope, default=None) - address = attr.ib(type=int, default=None) + address = attr.ib(type=int, default=None, validator=validate_address) def __attrs_post_init__(self): if self.scope and self.address is not None: @@ -392,7 +471,7 @@ def p_directive(p): directive : DIRECTIVE ENDL | DIRECTIVE directive_args ENDL """ - if len(p) == 2: + if len(p) == 3: p[0] = Directive(name=p[1], sourceref=_token_sref(p, 1)) else: p[0] = Directive(name=p[1], args=p[2], sourceref=_token_sref(p, 1)) @@ -423,14 +502,14 @@ def p_block_name_addr(p): """ block : BITINVERT NAME INTEGER endl_opt scope """ - p[0] = Block(name=p[2], address=p[3], scope=p[5], sourceref=_token_sref(p, 1)) + p[0] = Block(name=p[2], address=p[3], scope=p[5], sourceref=_token_sref(p, 2)) def p_block_name(p): """ block : BITINVERT NAME endl_opt scope """ - p[0] = Block(name=p[2], scope=p[4], sourceref=_token_sref(p, 1)) + p[0] = Block(name=p[2], scope=p[4], sourceref=_token_sref(p, 2)) def p_block(p): @@ -511,14 +590,14 @@ def p_vardef(p): """ vardef : VARTYPE type_opt NAME ENDL """ - p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], sourceref=_token_sref(p, 1)) + p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], sourceref=_token_sref(p, 3)) def p_vardef_value(p): """ vardef : VARTYPE type_opt NAME IS expression """ - p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], value=p[5], sourceref=_token_sref(p, 1)) + p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], value=p[5], sourceref=_token_sref(p, 3)) def p_type_opt(p): diff --git a/il65/symbols.py b/il65/symbols.py index f1c85c8a6..def9e2702 100644 --- a/il65/symbols.py +++ b/il65/symbols.py @@ -24,3 +24,15 @@ class DataType(enum.Enum): STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS} + + +def to_hex(number: int) -> str: + # 0..255 -> "$00".."$ff" + # 256..65536 -> "$0100".."$ffff" + if number is None: + raise ValueError("number") + if 0 <= number < 0x100: + return "${:02x}".format(number) + if 0 <= number < 0x10000: + return "${:04x}".format(number) + raise OverflowError(number) diff --git a/tests/test_compiler.py b/tests/test_compiler.py index b3676330d..92274de4a 100644 --- a/tests/test_compiler.py +++ b/tests/test_compiler.py @@ -1,4 +1,4 @@ -from il65.compiler import PlyParser +from il65.compile import PlyParser def test_compiler(): diff --git a/tests/test_core.py b/tests/test_core.py index 9db3136c5..06490cc22 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -1,6 +1,7 @@ -from il65.symbols import DataType, STRING_DATATYPES -from il65.compiler import ParseError -from il65.plylexer import SourceRef +import pytest +from il65.symbols import DataType, STRING_DATATYPES, to_hex +from il65.compile import ParseError +from il65.plylex import SourceRef def test_datatypes(): @@ -15,6 +16,19 @@ def test_sourceref(): def test_parseerror(): - p = ParseError("message", "source code", SourceRef("filename", 99, 42)) + p = ParseError("message", SourceRef("filename", 99, 42)) assert p.args == ("message", ) assert str(p) == "filename:99:42 message" + + +def test_to_hex(): + assert to_hex(0) == "$00" + assert to_hex(1) == "$01" + assert to_hex(255) == "$ff" + assert to_hex(256) == "$0100" + assert to_hex(20060) == "$4e5c" + assert to_hex(65535) == "$ffff" + with pytest.raises(OverflowError): + to_hex(-1) + with pytest.raises(OverflowError): + to_hex(65536) diff --git a/tests/test_optimizer.py b/tests/test_optimizer.py index e40175b10..53bf8316a 100644 --- a/tests/test_optimizer.py +++ b/tests/test_optimizer.py @@ -1,4 +1,4 @@ -from il65.optimizer import Optimizer +from il65.optimize import Optimizer def test_optimizer(): diff --git a/tests/test_parser.py b/tests/test_parser.py index d1307179b..da479b8ff 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,5 +1,5 @@ -from il65.plylexer import lexer, tokens, find_tok_column, literals, reserved -from il65.plyparser import parser, TokenFilter, Module, Subroutine, Block, Return +from il65.plylex import lexer, tokens, find_tok_column, literals, reserved +from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return def test_lexer_definitions(): diff --git a/testsource/dtypes.ill b/testsource/dtypes.ill index 9f26f52e1..3dc236a72 100644 --- a/testsource/dtypes.ill +++ b/testsource/dtypes.ill @@ -13,7 +13,7 @@ ; you can NOT put subroutines in here (yet). } -~ ZP $0004 { +~ ZP { var zpvar1 var zpvar2 memory zpmem1 = $f0 diff --git a/todo.ill b/todo.ill index 2b1f930fd..32dea9694 100644 --- a/todo.ill +++ b/todo.ill @@ -1,10 +1,14 @@ -%output prg,basic +%output prg +%saveregisters %import c64lib %import mathlib +%address 22222 + + ~ main $4444 { - %saveregisters true, false + %saveregisters true const num = 2 @@ -186,7 +190,8 @@ loop : sub sub1 () -> () { - %saveregisters off + %saveregisters no + %breakpoint %breakpoint %breakpoint @@ -197,7 +202,7 @@ label: sub emptysub () -> () { - %saveregisters on + %saveregisters yes }