diff --git a/.gitignore b/.gitignore new file mode 100644 index 000000000..65fa3abd3 --- /dev/null +++ b/.gitignore @@ -0,0 +1,16 @@ +*.py[cod] +*.egg +*.egg-info +/MANIFEST +/.idea/ +.tox/ +/build/ +/dist/ +/output/ +.cache/ +.eggs/ +*.directory +*.prg +*.asm +*.labels.txt +.mypy_cache/ diff --git a/README.md b/README.md new file mode 100644 index 000000000..034e7c715 --- /dev/null +++ b/README.md @@ -0,0 +1,6 @@ +# IL65 - codename 'Sick' + +Intermediate Language for the 8-bit 6502/6510 microprocessors. +Mainly targeted at the Commodore-64, but should be system independent. + +Work in progress. diff --git a/il65/__init__.py b/il65/__init__.py new file mode 100644 index 000000000..f688bb4c8 --- /dev/null +++ b/il65/__init__.py @@ -0,0 +1,6 @@ +""" +Intermediate Language for 6502/6510 microprocessors + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" diff --git a/il65/__main__.py b/il65/__main__.py new file mode 100644 index 000000000..1cc918a24 --- /dev/null +++ b/il65/__main__.py @@ -0,0 +1,9 @@ +""" +Intermediate Language for 6502/6510 microprocessors + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +from . import il65 +il65.main() diff --git a/il65/astparse.py b/il65/astparse.py new file mode 100644 index 000000000..5badd18cb --- /dev/null +++ b/il65/astparse.py @@ -0,0 +1,206 @@ +""" +Intermediate Language for 6502/6510 microprocessors +This is the expression parser/evaluator. + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +import ast +from typing import Union, Optional +from .symbols import FLOAT_MAX_POSITIVE, FLOAT_MAX_NEGATIVE, SourceRef, SymbolTable, SymbolError, DataType, PrimitiveType + + +class ParseError(Exception): + def __init__(self, message: str, text: str, sourceref: SourceRef) -> None: + self.sourceref = sourceref + self.msg = message + self.text = text + + def __str__(self): + return "{} {:s}".format(self.sourceref, self.msg) + + +class SourceLine: + def __init__(self, text: str, sourceref: SourceRef) -> None: + self.sourceref = sourceref + self.text = text.strip() + + def to_error(self, message: str) -> ParseError: + return ParseError(message, self.text, self.sourceref) + + def preprocess(self) -> str: + # transforms the source text into valid Python syntax by bending some things, so ast can parse it. + # $d020 -> 0xd020 + # %101001 -> 0xb101001 + # #something -> __ptr@something (matmult operator) + text = "" + quotes_stack = "" + characters = enumerate(self.text + " ") + for i, c in characters: + if c in ("'", '"'): + if quotes_stack and quotes_stack[-1] == c: + quotes_stack = quotes_stack[:-1] + else: + quotes_stack += c + text += c + continue + if not quotes_stack: + if c == '%' and self.text[i + 1] in "01": + text += "0b" + continue + if c == '$' and self.text[i + 1] in "0123456789abcdefABCDEF": + text += "0x" + continue + if c == '#': + if i > 0: + text += " " + text += "__ptr@" + continue + text += c + return text + + +def parse_expr_as_int(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, + minimum: int=0, maximum: int=0xffff) -> int: + result = parse_expr_as_primitive(text, context, sourceref, minimum=minimum, maximum=maximum) + if isinstance(result, int): + return result + src = SourceLine(text, sourceref) + raise src.to_error("int expected, not " + type(result).__name__) + + +def parse_expr_as_number(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, + minimum: float=FLOAT_MAX_NEGATIVE, maximum: float=FLOAT_MAX_POSITIVE) -> Union[int, float]: + result = parse_expr_as_primitive(text, context, sourceref, minimum=minimum, maximum=maximum) + if isinstance(result, (int, float)): + return result + src = SourceLine(text, sourceref) + raise src.to_error("int or float expected, not " + type(result).__name__) + + +def parse_expr_as_string(text: str, context: Optional[SymbolTable], sourceref: SourceRef) -> str: + result = parse_expr_as_primitive(text, context, sourceref) + if isinstance(result, str): + return result + src = SourceLine(text, sourceref) + raise src.to_error("string expected, not " + type(result).__name__) + + +def parse_expr_as_primitive(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, + minimum: float = FLOAT_MAX_NEGATIVE, maximum: float = FLOAT_MAX_POSITIVE) -> PrimitiveType: + src = SourceLine(text, sourceref) + text = src.preprocess() + try: + node = ast.parse(text, sourceref.file, mode="eval") + except SyntaxError as x: + raise src.to_error(str(x)) + if isinstance(node, ast.Expression): + result = ExpressionTransformer(src, context).evaluate(node) + else: + raise TypeError("ast.Expression expected") + if isinstance(result, bool): + return int(result) + if isinstance(result, (int, float)): + if minimum <= result <= maximum: + return result + raise src.to_error("number too large") + if isinstance(result, str): + return result + raise src.to_error("int or float or string expected, not " + type(result).__name__) + + +def parse_statement(text: str, sourceref: SourceRef) -> int: # @todo in progress... + src = SourceLine(text, sourceref) + text = src.preprocess() + node = ast.parse(text, sourceref.file, mode="single") + return node + + +class EvaluatingTransformer(ast.NodeTransformer): + def __init__(self, src: SourceLine, context: SymbolTable) -> None: + super().__init__() + self.src = src + self.context = context + + def error(self, message: str, column: int=0) -> ParseError: + if column: + ref = self.src.sourceref.copy() + ref.column = column + else: + ref = self.src.sourceref + return ParseError(message, self.src.text, ref) + + def evaluate(self, node: ast.Expression) -> PrimitiveType: + node = self.visit(node) + code = compile(node, self.src.sourceref.file, mode="eval") + if self.context: + globals = None + locals = self.context.as_eval_dict() + else: + globals = {"__builtins__": {}} + locals = None + try: + result = eval(code, globals, locals) + except Exception as x: + raise self.src.to_error(str(x)) + else: + if type(result) is bool: + return int(result) + return result + + +class ExpressionTransformer(EvaluatingTransformer): + def _dotted_name_from_attr(self, node: ast.Attribute) -> str: + if isinstance(node.value, ast.Name): + return node.value.id + '.' + node.attr + if isinstance(node.value, ast.Attribute): + return self._dotted_name_from_attr(node.value) + '.' + node.attr + raise self.error("dotted name error") + + def visit_Name(self, node: ast.Name): + # convert true/false names to True/False constants + if node.id == "true": + return ast.copy_location(ast.NameConstant(True), node) + if node.id == "false": + return ast.copy_location(ast.NameConstant(False), node) + return node + + def visit_UnaryOp(self, node): + if isinstance(node.operand, ast.Num): + if isinstance(node.op, ast.USub): + node = self.generic_visit(node) + return ast.copy_location(ast.Num(-node.operand.n), node) + if isinstance(node.op, ast.UAdd): + node = self.generic_visit(node) + return ast.copy_location(ast.Num(node.operand.n), node) + raise self.error("expected unary + or -") + else: + raise self.error("expected numeric operand for unary operator") + + def visit_BinOp(self, node): + node = self.generic_visit(node) + if isinstance(node.op, ast.MatMult): + if isinstance(node.left, ast.Name) and node.left.id == "__ptr": + if isinstance(node.right, ast.Attribute): + symbolname = self._dotted_name_from_attr(node.right) + elif isinstance(node.right, ast.Name): + symbolname = node.right.id + else: + raise self.error("can only take address of a named variable") + try: + address = self.context.get_address(symbolname) + except SymbolError as x: + raise self.error(str(x)) + else: + return ast.copy_location(ast.Num(address), node) + else: + raise self.error("invalid MatMult/Pointer node in AST") + return node + + +if __name__ == "__main__": + symbols = SymbolTable("", None, None) + symbols.define_variable("derp", SourceRef("", 1), DataType.BYTE, address=2345) + result = parse_expr_as_primitive("2+#derp", symbols, SourceRef("", 1)) + print("EXPRESSION RESULT:", result) diff --git a/il65/il65.py b/il65/il65.py new file mode 100644 index 000000000..d33dc80cd --- /dev/null +++ b/il65/il65.py @@ -0,0 +1,858 @@ +#! /usr/bin/env python3 + +""" +Intermediate Language for 6502/6510 microprocessors, codename 'Sick' +This is the main program and assembly code generator (from the parse tree) + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +import os +import io +import math +import datetime +import subprocess +import contextlib +import argparse +from functools import partial +from typing import TextIO, Set, Union +from .preprocess import PreprocessingParser +from .parse import ProgramFormat, Parser, ParseResult, Optimizer +from .symbols import Zeropage, DataType, VariableDef, REGISTER_WORDS, FLOAT_MAX_NEGATIVE, FLOAT_MAX_POSITIVE + + +class CodeError(Exception): + pass + + +class CodeGenerator: + def __init__(self, parsed: ParseResult) -> None: + self.parsed = parsed + self.generated_code = io.StringIO() + self.p = partial(print, file=self.generated_code) + self.previous_stmt_was_assignment = False + self.cur_block = None # type: ParseResult.Block + + def generate(self) -> None: + self.sanitycheck() + self.header() + self.initialize_variables() + self.blocks() + self.footer() + + def sanitycheck(self) -> None: + # duplicate block names? + all_blocknames = [b.name for b in self.parsed.blocks if b.name] + unique_blocknames = set(all_blocknames) + if len(all_blocknames) != len(unique_blocknames): + for name in unique_blocknames: + all_blocknames.remove(name) + raise CodeError("there are duplicate block names", all_blocknames) + # ZP block contains no code? + for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]: + if zpblock.label_names: + raise CodeError("ZP block cannot contain labels") + if zpblock.statements: + raise CodeError("ZP block cannot contain code statements") + + def optimize(self) -> None: + # optimize the generated assembly code + pass + + def write_assembly(self, out: TextIO) -> None: + out.write(self.generated_code.getvalue()) + + def header(self) -> None: + self.p("; code generated by il65.py - codename 'Sick'") + self.p("; source file:", self.parsed.sourcefile) + if self.parsed.with_sys: + self.p("; output format:", self.parsed.format.value, " (with basic program SYS)") + else: + self.p("; output format:", self.parsed.format.value) + self.p("; assembler syntax is for 64tasm") + self.p(".cpu '6502'\n.enc 'none'\n") + if self.parsed.format == ProgramFormat.PRG: + if self.parsed.with_sys: + self.p("; ---- basic program with sys call ----") + self.p("* = " + self.to_hex(self.parsed.start_address)) + year = datetime.datetime.now().year + self.p("\t\t.word (+), {:d}".format(year)) + self.p("\t\t.null $9e, format(' %d ', _il65_sysaddr), $3a, $8f, ' il65 by idj'") + self.p("+\t\t.word 0") + self.p("_il65_sysaddr\t\t; assembly code starts here\n") + else: + self.p("; ---- program without sys call ----") + self.p("* = " + self.to_hex(self.parsed.start_address) + "\n") + if self.parsed.format == ProgramFormat.RAW: + self.p("; ---- raw assembler program ----") + self.p("* = " + self.to_hex(self.parsed.start_address) + "\n") + + @staticmethod + def to_hex(number: int) -> str: + # 0..255 -> "$00".."$ff" + # 256..65536 -> "$0100".."$ffff" + if 0 <= number < 0x100: + return "${:02x}".format(number) + if number < 0x10000: + return "${:04x}".format(number) + raise OverflowError(number) + + @staticmethod + def to_mflpt5(number: float) -> bytearray: + # algorithm here https://sourceforge.net/p/acme-crossass/code-0/62/tree/trunk/ACME_Lib/cbm/mflpt.a + number = float(number) + if number < FLOAT_MAX_NEGATIVE or number > FLOAT_MAX_POSITIVE: + raise OverflowError("floating point number out of 5-byte mflpt range", number) + if number == 0.0: + return bytearray([0, 0, 0, 0, 0]) + if number < 0.0: + sign = 0x80000000 + number = -number + else: + sign = 0x00000000 + mant, exp = math.frexp(number) + exp += 128 + if exp < 1: + # underflow, use zero instead + return bytearray([0, 0, 0, 0, 0]) + if exp > 255: + raise OverflowError("floating point number out of 5-byte mflpt range", number) + mant = sign | int(mant * 0x100000000) & 0x7fffffff + return bytearray([exp]) + int.to_bytes(mant, 4, "big") + + @staticmethod + def mflpt5_to_float(mflpt: bytearray) -> float: + if mflpt == bytearray([0, 0, 0, 0, 0]): + return 0.0 + exp = mflpt[0] - 128 + sign = mflpt[1] & 0x80 + number = 0x80000000 | int.from_bytes(mflpt[1:], "big") + number = float(number) * 2**exp / 0x100000000 + return -number if sign else number + + def initialize_variables(self) -> None: + must_save_zp = self.parsed.clobberzp and self.parsed.restorezp + if must_save_zp: + self.p("; save zp") + self.p("\t\tsei") + self.p("\t\tldx #2") + self.p("-\t\tlda $00,x") + self.p("\t\tsta _il65_zp_backup-2,x") + self.p("\t\tinx") + self.p("\t\tbne -") + + # Only the vars from the ZeroPage need to be initialized here, + # the vars in all other blocks are just defined and pre-filled there. + zpblocks = [b for b in self.parsed.blocks if b.name == "ZP"] + if zpblocks: + assert len(zpblocks) == 1 + zpblock = zpblocks[0] + vars_to_init = [v for v in zpblock.symbols.iter_variables() + if v.allocate and v.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT)] + # @todo optimize sort order (sort on value first, then type, then blockname, then address/name) + # (str(self.value) or "", self.blockname, self.name or "", self.address or 0, self.seq_nr) + prev_value = 0 # type: Union[str, int, float] + if vars_to_init: + self.p("; init zp vars") + self.p("\t\tlda #0\n\t\tldx #0") + for variable in vars_to_init: + vname = zpblock.label + "." + variable.name + vvalue = variable.value + if variable.type == DataType.BYTE: + if vvalue != prev_value: + self.p("\t\tlda #${:02x}".format(vvalue)) + prev_value = vvalue + self.p("\t\tsta {:s}".format(vname)) + elif variable.type == DataType.WORD: + if vvalue != prev_value: + self.p("\t\tlda #<${:04x}".format(vvalue)) + self.p("\t\tldx #>${:04x}".format(vvalue)) + prev_value = vvalue + self.p("\t\tsta {:s}".format(vname)) + self.p("\t\tstx {:s}+1".format(vname)) + elif variable.type == DataType.FLOAT: + raise TypeError("floats cannot be stored in the zp") + self.p("; end init zp vars") + else: + self.p("\t\t; there are no zp vars to initialize") + else: + self.p("\t\t; there is no zp block to initialize") + main_block_label = [b.label for b in self.parsed.blocks if b.name == "main"][0] + if must_save_zp: + self.p("\t\tjsr {:s}.start\t\t; call user code".format(main_block_label)) + self.p("; restore zp") + self.p("\t\tcld") + self.p("\t\tphp\n\t\tpha\n\t\ttxa\n\t\tpha\n\t\tsei") + self.p("\t\tldx #2") + self.p("-\t\tlda _il65_zp_backup-2,x") + self.p("\t\tsta $00,x") + self.p("\t\tinx") + self.p("\t\tbne -") + self.p("\t\tcli\n\t\tpla\n\t\ttax\n\t\tpla\n\t\tplp") + self.p("\t\trts") + self.p("_il65_zp_backup\t\t.fill 254, 0") + else: + self.p("\t\tjmp {:s}.start\t\t; call user code".format(main_block_label)) + + def blocks(self) -> None: + # if there's a Zeropage block, it always goes first + for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]: + assert not zpblock.statements + self.cur_block = zpblock + self.p("\n; ---- zero page block: '{:s}' ----\t\t; src l. {:d}\n".format(zpblock.sourceref.file, zpblock.sourceref.line)) + self.p("{:s}\t.proc\n".format(zpblock.label)) + self.generate_block_vars(zpblock) + self.p("\t.pend\n") + # make sure the main.start routine clears the decimal and carry flags as first steps + for block in self.parsed.blocks: + if block.name == "main": + statements = list(block.statements) + for index, stmt in enumerate(statements): + if isinstance(stmt, ParseResult.Label) and stmt.name == "start": + asmlines = [ + "\t\tcld\t\t\t; clear decimal flag", + "\t\tclc\t\t\t; clear carry flag" + ] + statements.insert(index+1, ParseResult.InlineAsm(0, asmlines)) + break + block.statements = statements + # generate + for block in sorted(self.parsed.blocks, key=lambda b: b.address): + if block.name == "ZP": + continue # zeropage block is already processed + self.cur_block = block + self.p("\n; ---- next block: '{:s}' ----\t\t; src l. {:d}\n".format(block.sourceref.file, block.sourceref.line)) + if block.address: + self.p(".cerror * > ${0:04x}, 'block address overlaps by ', *-${0:04x},' bytes'".format(block.address)) + self.p("* = ${:04x}".format(block.address)) + self.p("{:s}\t.proc\n".format(block.label)) + self.generate_block_vars(block) + subroutines = list(block.symbols.iter_subroutines()) + if subroutines: + self.p("\n; external subroutines") + for subdef in subroutines: + self.p("\t\t{:s} = {:s}".format(subdef.name, self.to_hex(subdef.address))) + self.p("; end external subroutines") + for stmt in block.statements: + self.generate_statement(stmt) + self.p("\t.pend\n") + + def generate_block_vars(self, block: ParseResult.Block) -> None: + mem_vars = [vi for vi in block.symbols.iter_variables() if not vi.allocate and not vi.register] + if mem_vars: + self.p("; memory mapped variables") + for vardef in mem_vars: + # create a definition for variables at a specific place in memory (memory-mapped) + if vardef.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT): + self.p("\t\t{:s} = {:s}\t; {:s}".format(vardef.name, self.to_hex(vardef.address), vardef.type.name.lower())) + elif vardef.type == DataType.BYTEARRAY: + self.p("\t\t{:s} = {:s}\t; array of {:d} bytes".format(vardef.name, self.to_hex(vardef.address), vardef.length)) + elif vardef.type == DataType.WORDARRAY: + self.p("\t\t{:s} = {:s}\t; array of {:d} words".format(vardef.name, self.to_hex(vardef.address), vardef.length)) + elif vardef.type == DataType.MATRIX: + self.p("\t\t{:s} = {:s}\t; matrix {:d} by {:d} = {:d} bytes" + .format(vardef.name, self.to_hex(vardef.address), vardef.matrixsize[0], vardef.matrixsize[1], vardef.length)) + else: + raise ValueError("invalid var type") + non_mem_vars = [vi for vi in block.symbols.iter_variables() if vi.allocate] + if non_mem_vars: + self.p("; normal variables") + for vardef in non_mem_vars: + # create a definition for a variable that takes up space and will be initialized at startup + if vardef.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT): + if vardef.address: + assert block.name == "ZP", "only ZP-variables can be put on an address" + self.p("\t\t{:s} = {:s}".format(vardef.name, self.to_hex(vardef.address))) + else: + if vardef.type == DataType.BYTE: + self.p("{:s}\t\t.byte {:s}".format(vardef.name, self.to_hex(int(vardef.value)))) + elif vardef.type == DataType.WORD: + self.p("{:s}\t\t.word {:s}".format(vardef.name, self.to_hex(int(vardef.value)))) + elif vardef.type == DataType.FLOAT: + self.p("{:s}\t\t.byte ${:02x}, ${:02x}, ${:02x}, ${:02x}, ${:02x}" + .format(vardef.name, *self.to_mflpt5(float(vardef.value)))) + else: + raise TypeError("weird datatype") + elif vardef.type in (DataType.BYTEARRAY, DataType.WORDARRAY): + if vardef.address: + raise CodeError("array or wordarray vars must not have address; will be allocated by assembler") + if vardef.type == DataType.BYTEARRAY: + self.p("{:s}\t\t.fill {:d}, ${:02x}".format(vardef.name, vardef.length, vardef.value or 0)) + elif vardef.type == DataType.WORDARRAY: + f_hi, f_lo = divmod(vardef.value or 0, 256) # type: ignore + self.p("{:s}\t\t.fill {:d}, [${:02x}, ${:02x}]\t; {:d} words of ${:04x}" + .format(vardef.name, vardef.length * 2, f_lo, f_hi, vardef.length, vardef.value or 0)) + else: + raise TypeError("invalid datatype", vardef.type) + elif vardef.type == DataType.MATRIX: + if vardef.address: + raise CodeError("matrix vars must not have address; will be allocated by assembler") + self.p("{:s}\t\t.fill {:d}, ${:02x}\t\t; matrix {:d}*{:d} bytes" + .format(vardef.name, + vardef.matrixsize[0] * vardef.matrixsize[1], + vardef.value or 0, + vardef.matrixsize[0], vardef.matrixsize[1])) + elif vardef.type == DataType.STRING: + # 0-terminated string + self.p("{:s}\n\t\t.null {:s}".format(vardef.name, self.output_string(str(vardef.value)))) + elif vardef.type == DataType.STRING_P: + # pascal string + self.p("{:s}\n\t\t.ptext {:s}".format(vardef.name, self.output_string(str(vardef.value)))) + elif vardef.type == DataType.STRING_S: + # 0-terminated string in screencode encoding + self.p(".enc 'screen'") + self.p("{:s}\n\t\t.null {:s}".format(vardef.name, self.output_string(str(vardef.value), True))) + self.p(".enc 'none'") + elif vardef.type == DataType.STRING_PS: + # 0-terminated pascal string in screencode encoding + self.p(".enc 'screen'") + self.p("{:s}\n\t\t.ptext {:s}".format(vardef.name, self.output_string(str(vardef.value), True))) + self.p(".enc 'none'") + else: + raise CodeError("unknown variable type " + str(vardef.type)) + + def generate_statement(self, stmt: ParseResult._Stmt) -> None: + if isinstance(stmt, ParseResult.ReturnStmt): + if stmt.a: + if isinstance(stmt.a, ParseResult.IntegerValue): + self.p("\t\tlda #{:d}".format(stmt.a.value)) + else: + raise CodeError("can only return immediate values for now") # XXX + if stmt.x: + if isinstance(stmt.x, ParseResult.IntegerValue): + self.p("\t\tldx #{:d}".format(stmt.x.value)) + else: + raise CodeError("can only return immediate values for now") # XXX + if stmt.y: + if isinstance(stmt.y, ParseResult.IntegerValue): + self.p("\t\tldy #{:d}".format(stmt.y.value)) + else: + raise CodeError("can only return immediate values for now") # XXX + self.p("\t\trts") + elif isinstance(stmt, ParseResult.AssignmentStmt): + self.generate_assignment(stmt) + elif isinstance(stmt, ParseResult.Label): + self.p("\n{:s}\t\t\t\t; src l. {:d}".format(stmt.name, stmt.lineno)) + elif isinstance(stmt, ParseResult.IncrDecrStmt): + if stmt.howmuch in (-1, 1): + if isinstance(stmt.what, ParseResult.RegisterValue): + if stmt.howmuch == 1: + if stmt.what.register == 'A': + self.p("\t\tadc #1") + else: + self.p("\t\tin{:s}".format(stmt.what.register.lower())) + else: + if stmt.what.register == 'A': + self.p("\t\tsbc #1") + else: + self.p("\t\tde{:s}".format(stmt.what.register.lower())) + elif isinstance(stmt.what, ParseResult.MemMappedValue): + r_str = stmt.what.name or self.to_hex(stmt.what.address) + if stmt.what.datatype == DataType.BYTE: + if stmt.howmuch == 1: + self.p("\t\tinc " + r_str) + else: + self.p("\t\tdec " + r_str) + elif stmt.what.datatype == DataType.WORD: + # @todo verify this asm code + if stmt.howmuch == 1: + self.p("\t\tinc " + r_str) + self.p("\t\tbne +") + self.p("\t\tinc {:s}+1".format(r_str)) + self.p("+") + else: + self.p("\t\tdec " + r_str) + self.p("\t\tbne +") + self.p("\t\tdec {:s}+1".format(r_str)) + self.p("+") + else: + raise CodeError("cannot in/decrement memory of type " + str(stmt.what.datatype)) + else: + raise CodeError("cannot in/decrement " + str(stmt.what)) + elif stmt.howmuch > 0: + raise NotImplementedError("incr by > 1") # XXX + elif stmt.howmuch < 0: + raise NotImplementedError("decr by > 1") # XXX + elif isinstance(stmt, ParseResult.CallStmt): + is_indirect = False + if stmt.call_label: + call_target = stmt.call_label + if stmt.call_module: + call_target = stmt.call_module + "." + stmt.call_label + elif stmt.address is not None: + call_target = self.to_hex(stmt.address) + else: + assert stmt.indirect_pointer is not None + if isinstance(stmt.indirect_pointer, int): + call_target = self.to_hex(stmt.indirect_pointer) + else: + call_target = stmt.indirect_pointer + is_indirect = True + if stmt.subroutine: + assert not is_indirect + if stmt.subroutine.clobbered_registers: + if stmt.preserve_regs: # @todo make this work with the separate assignment statements for the parameters.. :( + clobbered = stmt.subroutine.clobbered_registers + else: + clobbered = set() + with self.preserving_registers(clobbered): + self.p("\t\tjsr " + call_target) + if stmt.is_goto: + self.p("\t\trts") + return + if stmt.is_goto: + if is_indirect: + if call_target in REGISTER_WORDS: + self.p("\t\tst{:s} {:s}".format(call_target[0].lower(), self.to_hex(Zeropage.SCRATCH_B1))) + self.p("\t\tst{:s} {:s}".format(call_target[1].lower(), self.to_hex(Zeropage.SCRATCH_B2))) + self.p("\t\tjmp ({:s})".format(self.to_hex(Zeropage.SCRATCH_B1))) + else: + self.p("\t\tjmp ({:s})".format(call_target)) + else: + self.p("\t\tjmp " + call_target) + else: + preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set() + with self.preserving_registers(preserve_regs): + if is_indirect: + if call_target in REGISTER_WORDS: + if stmt.preserve_regs: + # cannot use zp scratch + self.p("\t\tst{:s} ++".format(call_target[0].lower())) + self.p("\t\tst{:s} +++".format(call_target[1].lower())) + self.p("\t\tjsr +") + self.p("\t\tjmp ++++") + self.p("+\t\tjmp (+)") + self.p("+\t\t.byte 0\t; lo") + self.p("+\t\t.byte 0\t; hi") + self.p("+") + else: + self.p("\t\tst{:s} {:s}".format(call_target[0].lower(), self.to_hex(Zeropage.SCRATCH_B1))) + self.p("\t\tst{:s} {:s}".format(call_target[1].lower(), self.to_hex(Zeropage.SCRATCH_B2))) + self.p("\t\tjsr +") + self.p("\t\tjmp ++") + self.p("+\t\tjmp ({:s})".format(self.to_hex(Zeropage.SCRATCH_B1))) + self.p("+") + else: + self.p("\t\tjsr +") + self.p("\t\tjmp ++") + self.p("+\t\tjmp ({:s})".format(call_target)) + self.p("+") + else: + self.p("\t\tjsr " + call_target) + elif isinstance(stmt, ParseResult.InlineAsm): + self.p("\t\t; inline asm, src l. {:d}".format(stmt.lineno)) + for line in stmt.asmlines: + self.p(line) + self.p("\t\t; end inline asm, src l. {:d}".format(stmt.lineno)) + else: + raise CodeError("unknown statement " + repr(stmt)) + self.previous_stmt_was_assignment = isinstance(stmt, ParseResult.AssignmentStmt) + + def generate_assignment(self, stmt: ParseResult.AssignmentStmt) -> None: + self.p("\t\t\t\t\t; src l. {:d}".format(stmt.lineno)) + if isinstance(stmt.right, ParseResult.IntegerValue): + for lv in stmt.leftvalues: + if isinstance(lv, ParseResult.RegisterValue): + self.generate_assign_integer_to_reg(lv.register, stmt.right) + elif isinstance(lv, ParseResult.MemMappedValue): + self.generate_assign_integer_to_mem(lv, stmt.right) + else: + raise CodeError("invalid assignment target (1)", str(stmt)) + elif isinstance(stmt.right, ParseResult.RegisterValue): + for lv in stmt.leftvalues: + if isinstance(lv, ParseResult.RegisterValue): + self.generate_assign_reg_to_reg(lv, stmt.right.register) + elif isinstance(lv, ParseResult.MemMappedValue): + self.generate_assign_reg_to_memory(lv, stmt.right.register) + else: + raise CodeError("invalid assignment target (2)", str(stmt)) + elif isinstance(stmt.right, ParseResult.StringValue): + r_str = self.output_string(stmt.right.value, True) + for lv in stmt.leftvalues: + if isinstance(lv, ParseResult.RegisterValue): + if len(stmt.right.value) == 1: + self.generate_assign_char_to_reg(lv, r_str) + else: + self.generate_assign_string_to_reg(lv, stmt.right) + elif isinstance(lv, ParseResult.MemMappedValue): + if len(stmt.right.value) == 1: + self.generate_assign_char_to_memory(lv, r_str) + else: + self.generate_assign_string_to_memory(lv, stmt.right) + else: + raise CodeError("invalid assignment target (2)", str(stmt)) + elif isinstance(stmt.right, ParseResult.MemMappedValue): + for lv in stmt.leftvalues: + if isinstance(lv, ParseResult.RegisterValue): + self.generate_assign_mem_to_reg(lv.register, stmt.right) + elif isinstance(lv, ParseResult.MemMappedValue): + self.generate_assign_mem_to_mem(lv, stmt.right) + else: + raise CodeError("invalid assignment target (4)", str(stmt)) + elif isinstance(stmt.right, ParseResult.FloatValue): + mflpt = self.to_mflpt5(stmt.right.value) + for lv in stmt.leftvalues: + if isinstance(lv, ParseResult.MemMappedValue) and lv.datatype == DataType.FLOAT: + self.generate_store_immediate_float(lv, stmt.right.value, mflpt) + else: + raise CodeError("cannot assign float to ", str(lv)) + else: + raise CodeError("invalid assignment value type", str(stmt)) + + def generate_store_immediate_float(self, mmv: ParseResult.MemMappedValue, floatvalue: float, + mflpt: bytearray, emit_pha: bool=True) -> None: + target = mmv.name or self.to_hex(mmv.address) + if emit_pha: + self.p("\t\tpha\t\t\t; {:s} = {}".format(target, floatvalue)) + else: + self.p("\t\t\t\t\t; {:s} = {}".format(target, floatvalue)) + for num in range(5): + self.p("\t\tlda #${:02x}".format(mflpt[num])) + self.p("\t\tsta {:s}+{:d}".format(target, num)) + if emit_pha: + self.p("\t\tpla") + + def generate_assign_reg_to_memory(self, lv: ParseResult.MemMappedValue, r_register: str) -> None: + # Memory = Register + lv_string = lv.name or self.to_hex(lv.address) + if lv.datatype == DataType.BYTE: + if len(r_register) > 1: + raise CodeError("cannot assign register pair to single byte memory") + self.p("\t\tst{:s} {}".format(r_register.lower(), lv_string)) + elif lv.datatype == DataType.WORD: + if len(r_register) == 1: + self.p("\t\tst{:s} {}".format(r_register.lower(), lv_string)) # lsb + with self.preserving_registers({'A'}): + self.p("\t\tlda #0") + self.p("\t\tsta {:s}+1".format(lv_string)) # msb + else: + self.p("\t\tst{:s} {}".format(r_register[0].lower(), lv_string)) + self.p("\t\tst{:s} {}+1".format(r_register[1].lower(), lv_string)) + elif lv.datatype == DataType.FLOAT: + raise CodeError("assigning register to float not yet supported") # @todo support float=reg + else: + raise CodeError("invalid lvalue type", lv.datatype) + + def generate_assign_reg_to_reg(self, lv: ParseResult.RegisterValue, r_register: str) -> None: + if lv.register != r_register: + if lv.register == 'A': # x/y -> a + self.p("\t\tt{:s}a".format(r_register.lower())) + elif lv.register == 'Y': + if r_register == 'A': + # a -> y + self.p("\t\ttay") + else: + # x -> y, 6502 doesn't have txy + self.p("\t\tstx ${0:02x}\n\t\tldy ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register == 'X': + if r_register == 'A': + # a -> x + self.p("\t\ttax") + else: + # y -> x, 6502 doesn't have tyx + self.p("\t\tsty ${0:02x}\n\t\tldx ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register in REGISTER_WORDS: + if len(r_register) == 1: + # assign one register to a pair, so the hi byte is zero. + if lv.register == "AX" and r_register == "A": + self.p("\t\tldx #0") + elif lv.register == "AX" and r_register == "X": + self.p("\t\ttxa\n\t\tldx #0") + elif lv.register == "AX" and r_register == "Y": + self.p("\t\ttya\n\t\tldx #0") + elif lv.register == "AY" and r_register == "A": + self.p("\t\tldy #0") + elif lv.register == "AY" and r_register == "X": + self.p("\t\ttxa\n\t\tldy #0") + elif lv.register == "AY" and r_register == "Y": + self.p("\t\ttya\n\t\tldy #0") + elif lv.register == "XY" and r_register == "A": + self.p("\t\ttax\n\t\tldy #0") + elif lv.register == "XY" and r_register == "X": + self.p("\t\tldy #0") + elif lv.register == "XY" and r_register == "Y": + self.p("\t\ttyx\n\t\tldy #0") + else: + raise CodeError("invalid register combination", lv.register, r_register) + elif lv.register == "AX" and r_register == "AY": + # y -> x, 6502 doesn't have tyx + self.p("\t\tsty ${0:02x}\n\t\tldx ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register == "AX" and r_register == "XY": + self.p("\t\ttxa") + # y -> x, 6502 doesn't have tyx + self.p("\t\tsty ${0:02x}\n\t\tldx ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register == "AY" and r_register == "AX": + # x -> y, 6502 doesn't have txy + self.p("\t\tstx ${0:02x}\n\t\tldy ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register == "AY" and r_register == "XY": + self.p("\t\ttxa") + elif lv.register == "XY" and r_register == "AX": + self.p("\t\ttax") + # x -> y, 6502 doesn't have txy + self.p("\t\tstx ${0:02x}\n\t\tldy ${0:02x}".format(Zeropage.SCRATCH_B1)) + elif lv.register == "XY" and r_register == "AY": + self.p("\t\ttax") + else: + raise CodeError("invalid register combination", lv.register, r_register) + else: + raise CodeError("invalid register " + lv.register) + + @contextlib.contextmanager + def preserving_registers(self, registers: Set[str]): + # this clobbers a ZP scratch register and is therefore safe to use in interrupts + # see http://6502.org/tutorials/register_preservation.html + if registers == {'A'}: + self.p("\t\tpha") + yield + self.p("\t\tpla") + elif registers: + self.p("\t\tsta ${:02x}".format(Zeropage.SCRATCH_B2)) + if 'A' in registers: + self.p("\t\tpha") + if 'X' in registers: + self.p("\t\ttxa\n\t\tpha") + if 'Y' in registers: + self.p("\t\ttya\n\t\tpha") + self.p("\t\tlda ${:02x}".format(Zeropage.SCRATCH_B2)) + yield + if 'Y' in registers: + self.p("\t\tpla\n\t\ttay") + if 'X' in registers: + self.p("\t\tpla\n\t\ttax") + if 'A' in registers: + self.p("\t\tpla") + else: + yield + + def generate_assign_integer_to_mem(self, lv: ParseResult.MemMappedValue, rvalue: ParseResult.IntegerValue) -> None: + if lv.name: + symblock, sym = self.cur_block.lookup(lv.name) + if not isinstance(sym, VariableDef): + raise TypeError("invalid lvalue type " + str(sym)) + assign_target = symblock.label + "." + sym.name if symblock is not self.cur_block else lv.name + lvdatatype = sym.type + else: + assign_target = self.to_hex(lv.address) + lvdatatype = lv.datatype + r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.value) + if lvdatatype == DataType.BYTE: + if rvalue.value is not None and not lv.assignable_from(rvalue) or rvalue.datatype != DataType.BYTE: + raise OverflowError("value doesn't fit in a byte") + with self.preserving_registers({'A'}): + self.p("\t\tlda #" + r_str) + self.p("\t\tsta " + assign_target) + elif lvdatatype == DataType.WORD: + if rvalue.value is not None and not lv.assignable_from(rvalue): + raise OverflowError("value doesn't fit in a word") + with self.preserving_registers({'A'}): + self.p("\t\tlda #<" + r_str) + self.p("\t\tsta " + assign_target) + self.p("\t\tlda #>" + r_str) + self.p("\t\tsta {}+1".format(assign_target)) + elif lvdatatype == DataType.FLOAT: + if rvalue.value is not None and not DataType.FLOAT.assignable_from_value(rvalue.value): + raise ValueError("value cannot be assigned to a float") + floatvalue = float(rvalue.value) + self.generate_store_immediate_float(lv, floatvalue, self.to_mflpt5(floatvalue), False) + else: + raise TypeError("invalid lvalue type " + str(lvdatatype)) + + def generate_assign_mem_to_reg(self, l_register: str, rvalue: ParseResult.MemMappedValue) -> None: + r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.address) + if len(l_register) == 1: + if rvalue.datatype != DataType.BYTE: + raise CodeError("can only assign a byte to a register") + self.p("\t\tld{:s} {:s}".format(l_register.lower(), r_str)) + else: + if rvalue.datatype != DataType.WORD: + raise CodeError("can only assign a word to a register pair") + raise NotImplementedError # @todo other mmapped types + + def generate_assign_mem_to_mem(self, lv: ParseResult.MemMappedValue, rvalue: ParseResult.MemMappedValue) -> None: + r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.address) + if lv.datatype == DataType.BYTE: + if rvalue.datatype != DataType.BYTE: + raise CodeError("can only assign a byte to a byte") + with self.preserving_registers({'A'}): + self.p("\t\tlda " + r_str) + self.p("\t\tsta " + (lv.name or self.to_hex(lv.address))) + elif lv.datatype == DataType.WORD: + if rvalue.datatype == DataType.BYTE: + raise NotImplementedError # XXX + with self.preserving_registers({'A'}): + l_str = lv.name or self.to_hex(lv.address) + self.p("\t\tlda #0") + self.p("\t\tsta " + l_str) + self.p("\t\tlda " + r_str) + self.p("\t\tsta {:s}+1".format(l_str)) + elif rvalue.datatype == DataType.WORD: + with self.preserving_registers({'A'}): + l_str = lv.name or self.to_hex(lv.address) + self.p("\t\tlda {:s}".format(r_str)) + self.p("\t\tsta {:s}".format(l_str)) + self.p("\t\tlda {:s}+1".format(r_str)) + self.p("\t\tsta {:s}+1".format(l_str)) + else: + # @todo other mmapped types + raise CodeError("can only assign a byte or word to a word") + else: + raise CodeError("can only assign to a memory mapped byte or word value for now") # @todo + + def generate_assign_char_to_memory(self, lv: ParseResult.MemMappedValue, char_str: str) -> None: + # Memory = Character + with self.preserving_registers({'A'}): + self.p("\t\tlda #" + char_str) + if not lv.name: + self.p("\t\tsta " + self.to_hex(lv.address)) + return + # assign char value to a memory location by symbol name + symblock, sym = self.cur_block.lookup(lv.name) + if isinstance(sym, VariableDef): + assign_target = lv.name + if symblock is not self.cur_block: + assign_target = symblock.label + "." + sym.name + if sym.type == DataType.BYTE: + self.p("\t\tsta " + assign_target) + elif sym.type == DataType.WORD: + self.p("\t\tsta " + assign_target) + self.p("\t\tlda #0") + self.p("\t\tsta {}+1".format(assign_target)) + else: + raise TypeError("invalid lvalue type " + str(sym)) + else: + raise TypeError("invalid lvalue type " + str(sym)) + + def generate_assign_integer_to_reg(self, l_register: str, rvalue: ParseResult.IntegerValue) -> None: + r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.value) + if l_register in ('A', 'X', 'Y'): + self.p("\t\tld{:s} #{:s}".format(l_register.lower(), r_str)) + elif l_register in REGISTER_WORDS: + self.p("\t\tld{:s} #<{:s}".format(l_register[0].lower(), r_str)) + self.p("\t\tld{:s} #>{:s}".format(l_register[1].lower(), r_str)) + elif l_register == "SC": + # set/clear S carry bit + if rvalue.value: + self.p("\t\tsec") + else: + self.p("\t\tclc") + else: + raise CodeError("invalid register in immediate integer assignment", l_register, rvalue.value) + + def generate_assign_char_to_reg(self, lv: ParseResult.RegisterValue, char_str: str) -> None: + # Register = Char (string of length 1) + if lv.register not in ('A', 'X', 'Y'): + raise CodeError("invalid register for char assignment", lv.register) + self.p("\t\tld{:s} #{:s}".format(lv.register.lower(), char_str)) + + def generate_assign_string_to_reg(self, lv: ParseResult.RegisterValue, rvalue: ParseResult.StringValue) -> None: + if lv.register not in ("AX", "AY", "XY"): + raise CodeError("need register pair AX, AY or XY for string address assignment", lv.register) + if rvalue.name: + self.p("\t\tld{:s} #<{:s}".format(lv.register[0].lower(), rvalue.name)) + self.p("\t\tld{:s} #>{:s}".format(lv.register[1].lower(), rvalue.name)) + else: + raise CodeError("cannot assign immediate string, it should be a string variable") + + def generate_assign_string_to_memory(self, lv: ParseResult.MemMappedValue, rvalue: ParseResult.StringValue) -> None: + if lv.datatype != DataType.WORD: + raise CodeError("need word memory type for string address assignment") + if rvalue.name: + assign_target = lv.name if lv.name else self.to_hex(lv.address) + self.p("\t\tlda #<{:s}".format(rvalue.name)) + self.p("\t\tsta " + assign_target) + self.p("\t\tlda #>{:s}".format(rvalue.name)) + self.p("\t\tsta {}+1".format(assign_target)) + else: + raise CodeError("cannot assign immediate string, it should be a string variable") + + def footer(self) -> None: + self.p("\n\n.end") + + def output_string(self, value: str, screencodes: bool = False) -> str: + if len(value) == 1 and screencodes: + if value[0].isprintable() and ord(value[0]) < 128: + return "'{:s}'".format(value[0]) + else: + return str(ord(value[0])) + result = '"' + for char in value: + if char in "{}": + result += '", {:d}, "'.format(ord(char)) + elif char.isprintable() and ord(char) < 128: + result += char + else: + if screencodes: + result += '", {:d}, "'.format(ord(char)) + else: + if char == "\f": + result += "{clear}" + elif char == "\b": + result += "{delete}" + elif char == "\n": + result += "{lf}" + elif char == "\r": + result += "{cr}" + elif char == "\t": + result += "{tab}" + else: + result += '", {:d}, "'.format(ord(char)) + return result + '"' + + +class Assembler64Tass: + def __init__(self, format: ProgramFormat) -> None: + self.format = format + + def assemble(self, inputfilename: str, outputfilename: str) -> None: + args = ["64tass", "--ascii", "--case-sensitive", "-Wall", "-Wno-strict-bool", "--dump-labels", + "--labels", outputfilename+".labels.txt", "--output", outputfilename, inputfilename] + if self.format == ProgramFormat.PRG: + args.append("--cbm-prg") + elif self.format == ProgramFormat.RAW: + args.append("--nostart") + else: + raise ValueError("don't know how to create format "+str(self.format)) + try: + if self.format == ProgramFormat.PRG: + print("\ncreating C-64 .prg") + elif self.format == ProgramFormat.RAW: + print("\ncreating raw binary") + subprocess.check_call(args) + except subprocess.CalledProcessError as x: + print("assembler failed with returncode", x.returncode) + + +def main() -> None: + description = "Compiler for IL65 language, code name 'Sick'" + ap = argparse.ArgumentParser(description=description) + ap.add_argument("-o", "--output", help="output directory") + ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile") + args = ap.parse_args() + assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm" + program_filename = os.path.splitext(args.sourcefile)[0] + ".prg" + if args.output: + os.makedirs(args.output, mode=0o700, exist_ok=True) + assembly_filename = os.path.join(args.output, os.path.split(assembly_filename)[1]) + program_filename = os.path.join(args.output, os.path.split(program_filename)[1]) + + print("\n" + description) + + pp = PreprocessingParser(args.sourcefile) + sourcelines, symbols = pp.preprocess() + symbols.print_table(True) + + p = Parser(args.sourcefile, args.output, sourcelines, ppsymbols=symbols) + parsed = p.parse() + if parsed: + opt = Optimizer(parsed) + parsed = opt.optimize() + cg = CodeGenerator(parsed) + cg.generate() + cg.optimize() + with open(assembly_filename, "wt") as out: + cg.write_assembly(out) + assembler = Assembler64Tass(parsed.format) + assembler.assemble(assembly_filename, program_filename) + print("Output file: ", program_filename) + print() diff --git a/il65/parse.py b/il65/parse.py new file mode 100644 index 000000000..7faf37c55 --- /dev/null +++ b/il65/parse.py @@ -0,0 +1,1343 @@ +""" +Intermediate Language for 6502/6510 microprocessors +This is the parser of the IL65 code, that generates a parse tree. + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +import re +import os +import shutil +import enum +from typing import Set, List, Tuple, Optional, Any, Dict, Union +from .astparse import ParseError, parse_expr_as_int, parse_expr_as_number, parse_expr_as_primitive,\ + parse_expr_as_string +from .symbols import SourceRef, SymbolTable, DataType, SymbolDefinition, SubroutineDef, LabelDef, \ + zeropage, check_value_in_range, coerce_value, char_to_bytevalue, \ + VariableDef, ConstantDef, SymbolError, STRING_DATATYPES, \ + REGISTER_SYMBOLS, REGISTER_WORDS, REGISTER_BYTES, RESERVED_NAMES + + +class ProgramFormat(enum.Enum): + PRG = "prg" + RAW = "raw" + + +class ParseResult: + def __init__(self, sourcefile: str) -> None: + self.format = ProgramFormat.RAW + self.with_sys = False + self.sourcefile = sourcefile + self.clobberzp = False + self.restorezp = False + self.start_address = 0 + self.blocks = [] # type: List['ParseResult.Block'] + + class Block: + _unnamed_block_labels = {} # type: Dict[ParseResult.Block, str] + + def __init__(self, name: str, sourceref: SourceRef, parent_scope: SymbolTable) -> None: + self.sourceref = sourceref + self.address = 0 + self.name = name + self.statements = [] # type: List[ParseResult._Stmt] + self.symbols = SymbolTable(name, parent_scope, self) + + @property + def label_names(self) -> Set[str]: + return {symbol.name for symbol in self.symbols.iter_labels()} + + @property + def label(self) -> str: + if self.name: + return self.name + if self in self._unnamed_block_labels: + return self._unnamed_block_labels[self] + label = "il65_block_{:d}".format(len(self._unnamed_block_labels)) + self._unnamed_block_labels[self] = label + return label + + def lookup(self, dottedname: str) -> Tuple[Optional['ParseResult.Block'], Optional[Union[SymbolDefinition, SymbolTable]]]: + try: + scope, result = self.symbols.lookup(dottedname) + return scope.owning_block, result + except (SymbolError, LookupError): + return None, None + + def flatten_statement_list(self) -> None: + if all(isinstance(stmt, ParseResult._Stmt) for stmt in self.statements): + # this is the common case + return + statements = [] + for stmt in self.statements: + if isinstance(stmt, (tuple, list)): + statements.extend(stmt) + else: + assert isinstance(stmt, ParseResult._Stmt) + statements.append(stmt) + self.statements = statements + + class Value: + def __init__(self, datatype: DataType, name: str=None, constant: bool=False) -> None: + self.datatype = datatype + self.name = name + self.constant = constant + + def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: + if self.constant: + return False, "cannot assign to a constant" + return False, "incompatible value for assignment" + + class PlaceholderSymbol(Value): + def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: + return True, "" + + def __str__(self): + return "".format(self.name) + + class IntegerValue(Value): + def __init__(self, value: Optional[int], *, datatype: DataType=None, name: str=None) -> None: + if type(value) is int: + if datatype is None: + if 0 <= value < 0x100: + datatype = DataType.BYTE + elif value < 0x10000: + datatype = DataType.WORD + else: + raise OverflowError("value too big: ${:x}".format(value)) + else: + faultreason = check_value_in_range(datatype, "", 1, value) + if faultreason: + raise OverflowError(faultreason) + super().__init__(datatype, name, True) + self.value = value + elif value is None: + if not name: + raise ValueError("when integer value is not given, the name symbol should be speicified") + super().__init__(datatype, name, True) + self.value = None + else: + raise TypeError("invalid data type") + + def __hash__(self): + return hash((self.datatype, self.value, self.name)) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ParseResult.IntegerValue): + return NotImplemented + elif self is other: + return True + else: + return other.datatype == self.datatype and other.value == self.value and other.name == self.name + + def __str__(self): + return "".format(self.value, self.name) + + class FloatValue(Value): + def __init__(self, value: float, name: str=None) -> None: + if type(value) is float: + super().__init__(DataType.FLOAT, name, True) + self.value = value + else: + raise TypeError("invalid data type") + + def __hash__(self): + return hash((self.datatype, self.value, self.name)) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ParseResult.FloatValue): + return NotImplemented + elif self is other: + return True + else: + return other.datatype == self.datatype and other.value == self.value and other.name == self.name + + def __str__(self): + return "".format(self.value, self.name) + + class StringValue(Value): + def __init__(self, value: str, name: str=None, constant: bool=False) -> None: + super().__init__(DataType.STRING, name, constant) + self.value = value + + def __hash__(self): + return hash((self.datatype, self.value, self.name)) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ParseResult.StringValue): + return NotImplemented + elif self is other: + return True + else: + return other.datatype == self.datatype and other.value == self.value and other.name == self.name + + def __str__(self): + return "".format(self.value, self.name, self.constant) + + class RegisterValue(Value): + def __init__(self, register: str, datatype: DataType, name: str=None) -> None: + assert datatype in (DataType.BYTE, DataType.WORD) + assert register in REGISTER_SYMBOLS + super().__init__(datatype, name, False) + self.register = register + + def __hash__(self): + return hash((self.datatype, self.register, self.name)) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ParseResult.RegisterValue): + return NotImplemented + elif self is other: + return True + else: + return other.datatype == self.datatype and other.register == self.register and other.name == self.name + + def __str__(self): + return "".format(self.register, self.datatype, self.name) + + def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: + if self.register == "SC": + if isinstance(other, ParseResult.IntegerValue) and other.value in (0, 1): + return True, "" + return False, "can only assign an integer constant value of 0 or 1 to SC" + if self.constant: + return False, "cannot assign to a constant" + if isinstance(other, ParseResult.RegisterValue) and len(self.register) < len(other.register): + return False, "register size mismatch" + if isinstance(other, ParseResult.StringValue) and self.register in REGISTER_BYTES: + return False, "string address requires 16 bits combined register" + if isinstance(other, (ParseResult.IntegerValue, ParseResult.FloatValue)): + range_error = check_value_in_range(self.datatype, self.register, 1, other.value) + if range_error: + return False, range_error + return True, "" + if isinstance(other, ParseResult.PlaceholderSymbol): + return True, "" + if self.datatype == DataType.BYTE: + if other.datatype != DataType.BYTE: + return False, "(unsigned) byte required" + return True, "" + if self.datatype == DataType.WORD: + if other.datatype in (DataType.BYTE, DataType.WORD) or other.datatype in STRING_DATATYPES: + return True, "" + return False, "(unsigned) byte, word or string required" + return False, "incompatible value for assignment" + + class MemMappedValue(Value): + def __init__(self, address: Optional[int], datatype: DataType, length: int, name: str=None, constant: bool=False) -> None: + super().__init__(datatype, name, constant) + self.address = address + self.length = length + + def __hash__(self): + return hash((self.datatype, self.address, self.length, self.name)) + + def __eq__(self, other: Any) -> bool: + if not isinstance(other, ParseResult.MemMappedValue): + return NotImplemented + elif self is other: + return True + else: + return other.datatype == self.datatype and other.address == self.address and \ + other.length == self.length and other.name == self.name + + def __str__(self): + addr = "" if self.address is None else "${:04x}".format(self.address) + return ""\ + .format(addr, self.datatype, self.length, self.name, self.constant) + + def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: + if self.constant: + return False, "cannot assign to a constant" + if isinstance(other, ParseResult.PlaceholderSymbol): + return True, "" + if self.datatype == DataType.BYTE: + if isinstance(other, (ParseResult.IntegerValue, ParseResult.RegisterValue, ParseResult.MemMappedValue)): + if other.datatype == DataType.BYTE: + return True, "" + return False, "(unsigned) byte required" + elif isinstance(other, ParseResult.FloatValue): + range_error = check_value_in_range(self.datatype, "", 1, other.value) + if range_error: + return False, range_error + return True, "" + else: + return False, "(unsigned) byte required" + elif self.datatype in (DataType.WORD, DataType.FLOAT): + if isinstance(other, (ParseResult.IntegerValue, ParseResult.FloatValue)): + range_error = check_value_in_range(self.datatype, "", 1, other.value) + if range_error: + return False, range_error + return True, "" + elif isinstance(other, (ParseResult.RegisterValue, ParseResult.MemMappedValue)): + if other.datatype in (DataType.BYTE, DataType.WORD, DataType.FLOAT): + return True, "" + else: + return False, "byte or word or float required" + elif isinstance(other, ParseResult.StringValue): + if self.datatype == DataType.WORD: + return True, "" + return False, "string address requires 16 bits (a word)" + if self.datatype == DataType.BYTE: + return False, "(unsigned) byte required" + if self.datatype == DataType.WORD: + return False, "(unsigned) word required" + return False, "incompatible value for assignment" + + class _Stmt: + def resolve_symbol_references(self, parser: 'Parser') -> None: # @todo don't need this when using ppsymbols? + pass + + class Label(_Stmt): + def __init__(self, name: str, lineno: int) -> None: + self.name = name + self.lineno = lineno + + class AssignmentStmt(_Stmt): + def __init__(self, leftvalues: List['ParseResult.Value'], right: 'ParseResult.Value', lineno: int) -> None: + self.leftvalues = leftvalues + self.right = right + self.lineno = lineno + + def __str__(self): + return "".format(str(self.right), ",".join(str(lv) for lv in self.leftvalues)) + + def resolve_symbol_references(self, parser: 'Parser') -> None: + cur_block = parser.cur_block + if isinstance(self.right, ParseResult.PlaceholderSymbol): + value = parser.parse_expression(self.right.name, cur_block) + if isinstance(value, ParseResult.PlaceholderSymbol): + raise ParseError("cannot resolve rvalue symbol: " + self.right.name, "", cur_block.sourceref) + self.right = value + lv_resolved = [] + for lv in self.leftvalues: + if isinstance(lv, ParseResult.PlaceholderSymbol): + value = parser.parse_expression(lv.name, cur_block) + if isinstance(value, ParseResult.PlaceholderSymbol): + raise ParseError("cannot resolve lvalue symbol: " + lv.name, "", cur_block.sourceref) + lv_resolved.append(value) + else: + lv_resolved.append(lv) + self.leftvalues = lv_resolved + if any(isinstance(lv, ParseResult.PlaceholderSymbol) for lv in self.leftvalues) or \ + isinstance(self.right, ParseResult.PlaceholderSymbol): + raise ParseError("unresolved placeholders in assignment statement", "", cur_block.sourceref) + # check assignability again + for lv in self.leftvalues: + assignable, reason = lv.assignable_from(self.right) + if not assignable: + raise ParseError("cannot assign {0} to {1}; {2}".format(self.right, lv, reason), "", cur_block.sourceref) + + _immediate_string_vars = {} # type: Dict[str, Tuple[str, str]] + + def desugar_immediate_string(self, parser: 'Parser') -> None: + if self.right.name or not isinstance(self.right, ParseResult.StringValue): + return + if self.right.value in self._immediate_string_vars: + blockname, stringvar_name = self._immediate_string_vars[self.right.value] + if blockname: + self.right.name = blockname + "." + stringvar_name + else: + self.right.name = stringvar_name + else: + cur_block = parser.cur_block + stringvar_name = "il65_str_{:d}".format(id(self)) + cur_block.symbols.define_variable(stringvar_name, cur_block.sourceref, DataType.STRING, value=self.right.value) + self.right.name = stringvar_name + self._immediate_string_vars[self.right.value] = (cur_block.name, stringvar_name) + + class ReturnStmt(_Stmt): + def __init__(self, a: Optional['ParseResult.Value']=None, + x: Optional['ParseResult.Value']=None, + y: Optional['ParseResult.Value']=None) -> None: + self.a = a + self.x = x + self.y = y + + def resolve_symbol_references(self, parser: 'Parser') -> None: + if isinstance(self.a, ParseResult.PlaceholderSymbol) or \ + isinstance(self.x, ParseResult.PlaceholderSymbol) or \ + isinstance(self.y, ParseResult.PlaceholderSymbol): + cur_block = parser.cur_block + raise ParseError("unresolved placeholders in return statement", "", cur_block.sourceref) + + class IncrDecrStmt(_Stmt): + def __init__(self, what: 'ParseResult.Value', howmuch: int) -> None: + self.what = what + self.howmuch = howmuch + + def resolve_symbol_references(self, parser: 'Parser') -> None: + if isinstance(self.what, ParseResult.PlaceholderSymbol): + cur_block = parser.cur_block + value = parser.parse_expression(self.what.name, cur_block) + if isinstance(value, ParseResult.PlaceholderSymbol): + raise ParseError("cannot resolve symbol: " + self.what.name, "", cur_block.sourceref) + self.what = value + + class CallStmt(_Stmt): + def __init__(self, lineno: int, address: Optional[int]=None, unresolved: str=None, + arguments: List[Tuple[str, Any]]=None, is_goto: bool=False, + indirect_pointer: Optional[Union[int, str]]=None, preserve_regs: bool=True) -> None: + self.subroutine = None # type: SubroutineDef + self.unresolved = unresolved + self.is_goto = is_goto + self.preserve_regs = preserve_regs + self.call_module = "" + self.call_label = "" + self.lineno = lineno + self.arguments = arguments + self.address = address + self.indirect_pointer = indirect_pointer + if self.indirect_pointer: + assert self.subroutine is None and self.address is None + + def resolve_symbol_references(self, parser: 'Parser') -> None: + if self.unresolved: + cur_block = parser.cur_block + symblock, identifier = cur_block.lookup(self.unresolved) + if not identifier: + raise parser.PError("unknown symbol '{:s}'".format(self.unresolved), self.lineno) + if isinstance(identifier, SubroutineDef): + self.subroutine = identifier + if self.arguments is not None and len(self.arguments) != len(self.subroutine.parameters): + raise parser.PError("invalid number of arguments ({:d}, expected {:d})" + .format(len(self.arguments), len(self.subroutine.parameters)), self.lineno) + arguments = [] + for i, (argname, value) in enumerate(self.arguments or []): + pname, preg = self.subroutine.parameters[i] + if argname: + if argname != preg: + raise parser.PError("parameter mismatch ({:s}, expected {:s})".format(argname, preg), self.lineno) + else: + argname = preg + arguments.append((argname, value)) + self.arguments = arguments + elif isinstance(identifier, LabelDef): + pass + else: + raise parser.PError("invalid call target (should be label or address)", self.lineno) + if cur_block is symblock: + self.call_module, self.call_label = "", identifier.name + else: + self.call_module = symblock.label + self.call_label = identifier.name + self.unresolved = None + + def desugar_call_arguments(self, parser: 'Parser') -> List['ParseResult._Stmt']: + assert not self.unresolved + if not self.arguments: + return [self] + statements = [] # type: List[ParseResult._Stmt] + for name, value in self.arguments: + assert name is not None, "call argument should have a parameter name assigned" + assignment = parser.parse_assignment("{:s}={:s}".format(name, value)) + assignment.lineno = self.lineno + statements.append(assignment) + statements.append(self) + return statements + + class InlineAsm(_Stmt): + def __init__(self, lineno: int, asmlines: List[str]) -> None: + self.lineno = lineno + self.asmlines = asmlines + + def add_block(self, block: 'ParseResult.Block', position: Optional[int]=None) -> None: + if position is not None: + self.blocks.insert(position, block) + else: + self.blocks.append(block) + + def merge(self, parsed: 'ParseResult') -> None: + self.blocks.extend(parsed.blocks) + + +class Parser: + def __init__(self, filename: str, outputdir: str, sourcelines: List[Tuple[int, str]]=None, + parsing_import: bool=False, ppsymbols: SymbolTable=None) -> None: + self.result = ParseResult(filename) + self.sourceref = SourceRef(filename, -1, 0) + if sourcelines: + self.lines = sourcelines + else: + self.lines = self.load_source(filename) + self.outputdir = outputdir + self.parsing_import = parsing_import # are we parsing a import file? + self.cur_lineidx = -1 + self.cur_block = None # type: ParseResult.Block + self.root_scope = SymbolTable("", None, None) + self.ppsymbols = ppsymbols # symboltable from preprocess phase # @todo use this + + def load_source(self, filename: str) -> List[Tuple[int, str]]: + with open(filename, "rU") as source: + sourcelines = source.readlines() + # store all lines that are not empty or a comment, and strip any other comments + lines = [] + for num, line in enumerate(sourcelines, start=1): + line2 = line.strip() + if not line2 or line2.startswith(";"): + continue + lines.append((num, line.partition(";")[0].rstrip())) + return lines + + def parse(self) -> Optional[ParseResult]: + # start the parsing + try: + return self.parse_file() + except ParseError as x: + if x.text: + print("\tsource text: '{:s}'".format(x.text)) + if x.sourceref.column: + print("\t" + ' '*x.sourceref.column + ' ^') + if self.parsing_import: + print("Error (in imported file):", str(x)) + else: + print("Error:", str(x)) + raise # XXX temporary solution to get stack trace info in the event of parse errors + except Exception as x: + print("ERROR: internal parser error: ", x) + print(" file:", self.sourceref.file, "block:", self.cur_block.name, "line:", self.sourceref.line) + raise # XXX temporary solution to get stack trace info in the event of parse errors + + def parse_file(self) -> ParseResult: + print("\nparsing (pass 1)", self.sourceref.file) + self._parse_1() + print("\nparsing (pass 2)", self.sourceref.file) + self._parse_2() + return self.result + + def _parse_1(self) -> None: + self.parse_header() + zeropage.configure(self.result.clobberzp) + while True: + next_line = self.peek_next_line() + if next_line.lstrip().startswith("~"): + block = self.parse_block() + if block: + self.result.add_block(block) + elif next_line.lstrip().startswith("import"): + self.parse_import() + else: + break + line = self.next_line() + if line: + raise self.PError("invalid statement or characters, block expected") + if not self.parsing_import: + # check if we have a proper main block to contain the program's entry point + for block in self.result.blocks: + if block.name == "main": + if "start" not in block.label_names: + self.sourceref.line = block.sourceref.line + self.sourceref.column = 0 + raise self.PError("The 'main' block should contain the program entry point 'start'") + if not any(s for s in block.statements if isinstance(s, ParseResult.ReturnStmt)): + print("warning: {}: The 'main' block is lacking a return statement.".format(block.sourceref)) + break + else: + raise self.PError("A block named 'main' should be defined for the program's entry point 'start'") + + def _parse_2(self) -> None: + # parsing pass 2 + self.cur_block = None + self.sourceref.line = -1 + self.sourceref.column = 0 + for block in self.result.blocks: + self.cur_block = block + # resolve labels and names that were referencing unknown symbols + block.flatten_statement_list() + for index, stmt in enumerate(list(block.statements)): + stmt.resolve_symbol_references(self) + # create parameter loads for calls + block.flatten_statement_list() + for index, stmt in enumerate(list(block.statements)): + if isinstance(stmt, ParseResult.CallStmt): + self.sourceref.line = stmt.lineno + self.sourceref.column = 0 + statements = stmt.desugar_call_arguments(self) + if len(statements) == 1: + block.statements[index] = statements[0] + else: + block.statements[index] = statements # type: ignore + # desugar immediate string value assignments + block.flatten_statement_list() + for index, stmt in enumerate(list(block.statements)): + if isinstance(stmt, ParseResult.AssignmentStmt): + self.sourceref.line = stmt.lineno + self.sourceref.column = 0 + stmt.desugar_immediate_string(self) + + def next_line(self) -> str: + self.cur_lineidx += 1 + try: + self.sourceref.line, line = self.lines[self.cur_lineidx] + self.sourceref.column = 0 + return line + except IndexError: + return "" + + def prev_line(self) -> str: + self.cur_lineidx -= 1 + self.sourceref.line, line = self.lines[self.cur_lineidx] + return line + + def peek_next_line(self) -> str: + if (self.cur_lineidx + 1) < len(self.lines): + return self.lines[self.cur_lineidx + 1][1] + return "" + + def PError(self, message: str, lineno: Optional[int]=None) -> ParseError: + sourceline = "" + if lineno: + for num, text in self.lines: + if num == lineno: + sourceline = text.strip() + break + else: + lineno = self.sourceref.line + self.cur_lineidx = min(self.cur_lineidx, len(self.lines) - 1) + if self.cur_lineidx: + sourceline = self.lines[self.cur_lineidx][1].strip() + return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno)) + + def parse_header(self) -> None: + self.result.with_sys = False + self.result.format = ProgramFormat.RAW + output_specified = False + while True: + line = self.next_line() + if line.startswith("output"): + if output_specified: + raise self.PError("multiple occurrences of 'output'") + output_specified = True + _, _, arg = line.partition(" ") + arg = arg.lstrip() + self.result.with_sys = False + self.result.format = ProgramFormat.RAW + if arg == "raw": + pass + elif arg == "prg": + self.result.format = ProgramFormat.PRG + elif arg.replace(' ', '') == "prg,sys": + self.result.with_sys = True + self.result.format = ProgramFormat.PRG + else: + raise self.PError("invalid output format") + elif line.startswith("clobberzp"): + if self.result.clobberzp: + raise self.PError("multiple occurrences of 'clobberzp'") + self.result.clobberzp = True + _, _, arg = line.partition(" ") + arg = arg.lstrip() + if arg == "restore": + self.result.restorezp = True + elif arg == "": + pass + else: + raise self.PError("invalid arg for clobberzp") + elif line.startswith("address"): + if self.result.start_address: + raise self.PError("multiple occurrences of 'address'") + _, _, arg = line.partition(" ") + try: + self.result.start_address = parse_expr_as_int(arg, None, self.sourceref) + except ParseError: + raise self.PError("invalid address") + if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: + raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") + else: + # header parsing finished! + self.prev_line() + if not self.result.start_address: + # set the proper default start address + if self.result.format == ProgramFormat.PRG: + self.result.start_address = 0x0801 # normal C-64 basic program start address + elif self.result.format == ProgramFormat.RAW: + self.result.start_address = 0xc000 # default start for raw assembly + if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: + raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") + return + + def parse_import(self) -> None: + line = self.next_line() + line = line.lstrip() + if not line.startswith("import"): + raise self.PError("expected import") + try: + _, arg = line.split(maxsplit=1) + except ValueError: + raise self.PError("invalid import statement") + if not arg.startswith('"') or not arg.endswith('"'): + raise self.PError("filename must be between quotes") + filename = arg[1:-1] + if not filename: + raise self.PError("invalid filename") + filename_at_source_location = os.path.join(os.path.split(self.sourceref.file)[0], filename) + filename_at_libs_location = os.path.join(os.getcwd(), "lib", filename) + candidates = [filename, + filename_at_source_location, + filename_at_libs_location, + filename+".ill", + filename_at_source_location+".ill", + filename_at_libs_location+".ill"] + for filename in candidates: + if os.path.isfile(filename): + print("importing", filename) + parser = self.create_import_parser(filename, self.outputdir) + result = parser.parse() + print("\ncontinuing", self.sourceref.file) + if result: + self.root_scope.merge_roots(parser.root_scope) + self.result.merge(result) + return + else: + raise self.PError("Error while parsing imported file") + raise self.PError("imported file not found") + + def create_import_parser(self, filename: str, outputdir: str) -> 'Parser': + return Parser(filename, outputdir, parsing_import=True) + + def parse_block(self) -> ParseResult.Block: + # first line contains block header "~ [name] [addr]" followed by a '{' + line = self.next_line() + line = line.lstrip() + if not line.startswith("~"): + raise self.PError("expected '~' (block)") + block_args = line[1:].split() + arg = "" + self.cur_block = ParseResult.Block("", self.sourceref, self.root_scope) + is_zp_block = False + while block_args: + arg = block_args.pop(0) + if arg.isidentifier(): + if arg.lower() == "zeropage" or arg in ("zp", "zP", "Zp"): + raise self.PError("zero page block should be named 'ZP'") + is_zp_block = arg == "ZP" + if arg in set(b.name for b in self.result.blocks): + orig = [b for b in self.result.blocks if b.name == arg][0] + if not is_zp_block: + raise self.PError("duplicate block name '{:s}', original definition at {}".format(arg, orig.sourceref)) + self.cur_block = orig # zero page block occurrences are merged + else: + self.cur_block = ParseResult.Block(arg, self.sourceref, self.root_scope) + try: + self.root_scope.define_scope(self.cur_block.symbols, self.sourceref) + except SymbolError as x: + raise self.PError(str(x)) + elif arg == "{": + break + elif arg.endswith("{"): + # when there is no whitespace before the { + block_args.insert(0, "{") + block_args.insert(0, arg[:-1]) + continue + else: + try: + block_address = parse_expr_as_int(arg, None, self.sourceref) + except ParseError: + raise self.PError("Invalid block address") + if block_address == 0 or (block_address < 0x0200 and not is_zp_block): + raise self.PError("block address must be >= $0200 (or omitted)") + if is_zp_block: + if block_address not in (0, 0x04): + raise self.PError("zero page block address must be $04 (or omittted)") + block_address = 0x04 + self.cur_block.address = block_address + if arg != "{": + line = self.peek_next_line() + if line != "{": + raise self.PError("expected '{' after block") + else: + self.next_line() + if self.cur_block.address: + print(" parsing block '{:s}' at ${:04x}".format(self.cur_block.name, self.cur_block.address)) + else: + print(" parsing block '{:s}'".format(self.cur_block.name)) + while True: + line = self.next_line() + unstripped_line = line + line = line.strip() + if line == "}": + if is_zp_block and any(b.name == "ZP" for b in self.result.blocks): + return None # we already have the ZP block + if not self.cur_block.name and not self.cur_block.address: + print("warning: {}: Ignoring block without name and address.".format(self.cur_block.sourceref)) + return None + return self.cur_block + if line.startswith("var"): + self.parse_var_def(line) + elif line.startswith("const"): + self.parse_const_def(line) + elif line.startswith("memory"): + self.parse_memory_def(line, is_zp_block) + elif line.startswith("subx"): + if is_zp_block: + raise self.PError("ZP block cannot contain subroutines") + self.parse_subx_def(line) + elif line.startswith(("asminclude", "asmbinary")): + if is_zp_block: + raise self.PError("ZP block cannot contain assembler directives") + self.cur_block.statements.append(self.parse_asminclude(line)) + elif line.startswith("asm"): + if is_zp_block: + raise self.PError("ZP block cannot contain code statements") + self.prev_line() + self.cur_block.statements.append(self.parse_asm()) + continue + elif unstripped_line.startswith((" ", "\t")): + if is_zp_block: + raise self.PError("ZP block cannot contain code statements") + self.cur_block.statements.append(self.parse_statement(line)) + continue + elif line: + if is_zp_block: + raise self.PError("ZP block cannot contain code labels") + self.parse_label(line) + else: + raise self.PError("missing } to close block from line " + str(self.cur_block.sourceref.line)) + + def parse_label(self, line: str) -> None: + label_line = line.split(maxsplit=1) + if str.isidentifier(label_line[0]): + labelname = label_line[0] + if labelname in self.cur_block.label_names: + raise self.PError("label already defined") + if labelname in self.cur_block.symbols: + raise self.PError("symbol already defined") + self.cur_block.symbols.define_label(labelname, self.sourceref) + self.cur_block.statements.append(ParseResult.Label(labelname, self.sourceref.line)) + if len(label_line) > 1: + rest = label_line[1] + self.cur_block.statements.append(self.parse_statement(rest)) + else: + raise self.PError("invalid label name") + + def parse_memory_def(self, line: str, is_zeropage: bool=False) -> None: + varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "memory") + memaddress = parse_expr_as_int(valuetext, self.cur_block.symbols, self.sourceref) + if is_zeropage and memaddress > 0xff: + raise self.PError("address must lie in zeropage $00-$ff") + try: + self.cur_block.symbols.define_variable(varname, self.sourceref, datatype, + length=length, address=memaddress, matrixsize=dimensions) + except SymbolError as x: + raise self.PError(str(x)) from x + + def parse_const_def(self, line: str) -> None: + varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "const") + if dimensions: + raise self.PError("cannot declare a constant matrix") + value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.sourceref) + _, value = coerce_value(self.sourceref, datatype, value) + try: + self.cur_block.symbols.define_constant(varname, self.sourceref, datatype, length=length, value=value) + except (ValueError, SymbolError) as x: + raise self.PError(str(x)) from x + + def parse_subx_def(self, line: str) -> None: + match = re.match(r"^subx\s+(?P\w+)\s+" + r"\((?P[\w\s:,]*)\)" + r"\s*->\s*" + r"\((?P[\w\s?,]*)\)\s*" + r"\s+=\s+(?P
\S*)\s*$", line) + if not match: + raise self.PError("invalid subx declaration") + name, parameterlist, resultlist, address_str = \ + match.group("name"), match.group("parameters"), match.group("results"), match.group("address") + parameters = [(match.group("name"), match.group("target")) + for match in re.finditer(r"(?:(?:(?P[\w]+)\s*:\s*)?(?P[\w]+))(?:,|$)", parameterlist)] + for _, regs in parameters: + if regs not in REGISTER_SYMBOLS: + raise self.PError("invalid register(s) in parameter or return values") + all_paramnames = [p[0] for p in parameters if p[0]] + if len(all_paramnames) != len(set(all_paramnames)): + raise self.PError("duplicates in parameter names") + results = {match.group("name") for match in re.finditer(r"\s*(?P(?:\w+)\??)\s*(?:,|$)", resultlist)} + try: + address = parse_expr_as_int(address_str, None, self.sourceref) + except ParseError: + raise self.PError("invalid subroutine address") + try: + self.cur_block.symbols.define_sub(name, self.sourceref, parameters, results, address) + except SymbolError as x: + raise self.PError(str(x)) from x + + def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: + if typestr == ".byte": + return DataType.BYTE, 1, None + elif typestr == ".word": + return DataType.WORD, 1, None + elif typestr == ".float": + return DataType.FLOAT, 1, None + elif typestr.endswith("text"): + if typestr == ".text": + return DataType.STRING, 0, None + elif typestr == ".ptext": + return DataType.STRING_P, 0, None + elif typestr == ".stext": + return DataType.STRING_S, 0, None + elif typestr == ".pstext": + return DataType.STRING_PS, 0, None + elif typestr.startswith(".array(") and typestr.endswith(")"): + return DataType.BYTEARRAY, self._size_from_arraydecl(typestr), None + elif typestr.startswith(".wordarray(") and typestr.endswith(")"): + return DataType.WORDARRAY, self._size_from_arraydecl(typestr), None + elif typestr.startswith(".matrix(") and typestr.endswith(")"): + dimensions = self._size_from_matrixdecl(typestr) + return DataType.MATRIX, dimensions[0] * dimensions[1], dimensions + raise self.PError("invalid data type: " + typestr) + + def parse_var_def(self, line: str) -> None: + varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "var", False) + value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.sourceref) + _, value = coerce_value(self.sourceref, datatype, value) + try: + self.cur_block.symbols.define_variable(varname, self.sourceref, datatype, + length=length, value=value, matrixsize=dimensions) + except (ValueError, SymbolError) as x: + raise self.PError(str(x)) from x + + def parse_def_common(self, line: str, what: str, value_required: bool=True) -> \ + Tuple[str, DataType, int, Optional[Tuple[int, int]], str]: + try: + vartext, valuetext = line.split("=", maxsplit=1) + except ValueError: + if '=' not in line: + if value_required: + raise self.PError("missing value assignment") + vartext, valuetext = line, "0" # unspecified value is '0' + else: + raise self.PError("invalid {:s} decl, '=' missing?".format(what)) + args = self.psplit(vartext) + if args[0] != what or len(args) < 2: + raise self.PError("invalid {:s} decl".format(what)) + if len(args) > 3 or valuetext.startswith('='): + raise self.PError("invalid {:s} decl, '=' missing?".format(what)) + if len(args) == 2: + args.insert(1, ".byte") # unspecified data type is ".byte" + if not args[1].startswith("."): + raise self.PError("invalid {:s} decl, type is missing".format(what)) + varname = args[2] + if not varname.isidentifier(): + raise self.PError("invalid {:s} name".format(what)) + if varname in RESERVED_NAMES: + raise self.PError("can't use a reserved name as {:s} name".format(what)) + datatype, length, matrix_dimensions = self.get_datatype(args[1]) + return varname, datatype, length, matrix_dimensions, valuetext + + def parse_statement(self, line: str) -> ParseResult._Stmt: + # check if we have a subroutine call using () syntax + match = re.match(r"^(?P[\w\.]+)\s*(?P[!]?)\s*\((?P.*)\)\s*$", line) + if match: + subname = match.group("subname") + fcall = "f" if match.group("fcall") else "" + param_str = match.group("params") + # turn this into "[f]call subname parameters" so it will be parsed below + line = "{:s}call {:s} {:s}".format(fcall, subname, param_str) + if line.startswith("return"): + return self.parse_return(line) + elif line.endswith(("++", "--")): + incr = line.endswith("++") + what = self.parse_expression(line[:-2].rstrip()) + if isinstance(what, ParseResult.IntegerValue): + raise self.PError("cannot in/decrement a constant value") + return ParseResult.IncrDecrStmt(what, 1 if incr else -1) + elif line.startswith("call"): + return self.parse_call_or_go(line, "call") + elif line.startswith("fcall"): + return self.parse_call_or_go(line, "fcall") + elif line.startswith("go"): + return self.parse_call_or_go(line, "go") + else: + # perhaps it is an assignment statment + lhs, sep, rhs = line.partition("=") + if sep: + return self.parse_assignment(line) + raise self.PError("invalid statement") + + def parse_call_or_go(self, line: str, what: str) -> ParseResult.CallStmt: + args = line.split(maxsplit=2) + if len(args) == 2: + subname, argumentstr, = args[1], "" + arguments = None + elif len(args) == 3: + subname, argumentstr = args[1], args[2] + arguments = [] + for part in argumentstr.split(','): + pname, sep, pvalue = part.partition('=') + pname = pname.strip() + pvalue = pvalue.strip() + if sep: + arguments.append((pname, pvalue)) + else: + arguments.append((None, pname)) + else: + raise self.PError("invalid call/go arguments") + address = None + if subname[0] == '[' and subname[-1] == ']': + # indirect call to address in register pair or memory location + pointerstr = subname[1:-1].strip() + indirect_pointer = pointerstr # type: Union[int, str] + if pointerstr[0] == '#': + _, symbol = self.cur_block.lookup(pointerstr[1:]) + indirect_pointer = self.cur_block.symbols.get_address(pointerstr[1:]) + symboltype = getattr(symbol, "type", None) + if symboltype and symboltype != DataType.WORD: + raise self.PError("invalid call target (should contain 16-bit)") + else: + # the pointer should be a number or a + _, symbol = self.cur_block.lookup(pointerstr) + if isinstance(symbol, VariableDef): + if symbol.address is not None: + raise self.PError("invalid call target (should be label or address)") + if symbol.type != DataType.WORD: + raise self.PError("invalid call target (should be 16-bit address)") + if what == "go": + return ParseResult.CallStmt(self.sourceref.line, is_goto=True, indirect_pointer=indirect_pointer) + elif what == "call": + return ParseResult.CallStmt(self.sourceref.line, indirect_pointer=indirect_pointer) + elif what == "fcall": + return ParseResult.CallStmt(self.sourceref.line, indirect_pointer=indirect_pointer, preserve_regs=False) + else: + raise ValueError("invalid what") + else: + # subname can be a label, or an immediate address (but not #symbol - use subx for that) + if subname[0] == '#': + raise self.PError("to call a subroutine, use a subx definition instead") + else: + try: + address = self.parse_integer(subname) + subname = None + except ValueError: + pass + if what == "go": + return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, is_goto=True) + elif what == "call": + return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, arguments=arguments) + elif what == "fcall": + return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, arguments=arguments, preserve_regs=False) + else: + raise ValueError("invalid what") + + def parse_integer(self, text: str) -> int: + text = text.strip() + if text.startswith('$'): + return int(text[1:], 16) + if text.startswith('%'): + return int(text[1:], 2) + return int(text) + + def parse_assignment(self, line: str) -> ParseResult.AssignmentStmt: + # parses assigning a value to one or more targets + parts = line.split("=") + rhs = parts.pop() + l_values = [self.parse_expression(part) for part in parts] + if any(isinstance(lv, ParseResult.IntegerValue) for lv in l_values): + raise self.PError("can't have a constant as assignment target, did you mean [name] instead?") + r_value = self.parse_expression(rhs) + for lv in l_values: + assignable, reason = lv.assignable_from(r_value) + if not assignable: + raise self.PError("cannot assign {0} to {1}; {2}".format(r_value, lv, reason)) + if lv.datatype in (DataType.BYTE, DataType.WORD, DataType.MATRIX): + if isinstance(r_value, ParseResult.FloatValue): + truncated, value = coerce_value(self.sourceref, lv.datatype, r_value.value) + if truncated: + r_value = ParseResult.IntegerValue(int(value), datatype=lv.datatype, name=r_value.name) + return ParseResult.AssignmentStmt(l_values, r_value, self.sourceref.line) + + def parse_return(self, line: str) -> ParseResult.ReturnStmt: + parts = line.split(maxsplit=1) + if parts[0] != "return": + raise self.PError("invalid statement, return expected") + a = x = y = None + values = [] # type: List[str] + if len(parts) > 1: + values = parts[1].split(",") + if len(values) == 0: + return ParseResult.ReturnStmt() + else: + a = self.parse_expression(values[0]) if values[0] else None + if len(values) > 1: + x = self.parse_expression(values[1]) if values[1] else None + if len(values) > 2: + y = self.parse_expression(values[2]) if values[2] else None + if len(values) > 3: + raise self.PError("too many returnvalues") + return ParseResult.ReturnStmt(a, x, y) + + def parse_asm(self) -> ParseResult.InlineAsm: + line = self.next_line() + aline = line.split() + if not len(aline) == 2 or aline[0] != "asm" or aline[1] != "{": + raise self.PError("invalid asm start") + asmlines = [] # type: List[str] + while True: + line = self.next_line() + if line.strip() == "}": + return ParseResult.InlineAsm(self.sourceref.line, asmlines) + asmlines.append(line) + + def parse_asminclude(self, line: str) -> ParseResult.InlineAsm: + aline = line.split() + if len(aline) < 2: + raise self.PError("invalid asminclude or asmbinary statement") + filename = aline[1] + if not filename.startswith('"') or not filename.endswith('"'): + raise self.PError("filename must be between quotes") + filename = filename[1:-1] + if not filename: + raise self.PError("invalid filename") + filename_in_sourcedir = os.path.join(os.path.split(self.sourceref.file)[0], filename) + filename_in_output_location = os.path.join(self.outputdir, filename) + if not os.path.isfile(filename_in_sourcedir): + raise self.PError("included file not found") + print("copying included file to output location:", filename) + shutil.copy(filename_in_sourcedir, filename_in_output_location) + if aline[0] == "asminclude": + if len(aline) == 3: + scopename = aline[2] + lines = ['{:s}\t.binclude "{:s}"'.format(scopename, filename)] + else: + raise self.PError("invalid asminclude statement") + return ParseResult.InlineAsm(self.sourceref.line, lines) + elif aline[0] == "asmbinary": + if len(aline) == 4: + offset = parse_expr_as_int(aline[2], None, self.sourceref) + length = parse_expr_as_int(aline[3], None, self.sourceref) + lines = ['\t.binary "{:s}", ${:04x}, ${:04x}'.format(filename, offset, length)] + elif len(aline) == 3: + offset = parse_expr_as_int(aline[2], None, self.sourceref) + lines = ['\t.binary "{:s}", ${:04x}'.format(filename, offset)] + elif len(aline) == 2: + lines = ['\t.binary "{:s}"'.format(filename)] + else: + raise self.PError("invalid asmbinary statement") + return ParseResult.InlineAsm(self.sourceref.line, lines) + else: + raise self.PError("invalid statement") + + def parse_expression(self, text: str, cur_block: Optional[ParseResult.Block]=None) -> ParseResult.Value: + # parse an expression into whatever it is (primitive value, register, memory, register, etc) + cur_block = cur_block or self.cur_block + text = text.strip() + if not text: + raise self.PError("value expected") + if text[0] == '#': + # take the pointer (memory address) from the thing that follows this + expression = self.parse_expression(text[1:], cur_block) + if isinstance(expression, ParseResult.StringValue): + return expression + elif isinstance(expression, ParseResult.MemMappedValue): + return ParseResult.IntegerValue(expression.address, datatype=DataType.WORD, name=expression.name) + elif isinstance(expression, ParseResult.PlaceholderSymbol): + raise self.PError("cannot take the address of an unknown symbol") + else: + raise self.PError("cannot take the address of this type") + elif text[0] in "-.0123456789$%": + number = parse_expr_as_number(text, None, self.sourceref) + try: + if type(number) is int: + return ParseResult.IntegerValue(int(number)) + elif type(number) is float: + return ParseResult.FloatValue(number) + else: + raise TypeError("invalid number type") + except (ValueError, OverflowError) as ex: + raise self.PError(str(ex)) + elif text in REGISTER_WORDS: + return ParseResult.RegisterValue(text, DataType.WORD) + elif text in REGISTER_BYTES: + return ParseResult.RegisterValue(text, DataType.BYTE) + elif (text.startswith("'") and text.endswith("'")) or (text.startswith('"') and text.endswith('"')): + strvalue = parse_expr_as_string(text, None, self.sourceref) + if len(strvalue) == 1: + petscii_code = char_to_bytevalue(strvalue) + return ParseResult.IntegerValue(petscii_code) + return ParseResult.StringValue(strvalue) + elif text == "true": + return ParseResult.IntegerValue(1) + elif text == "false": + return ParseResult.IntegerValue(0) + elif self.is_identifier(text): + symblock, sym = cur_block.lookup(text) + if sym is None: + # symbols is not (yet) known, store a placeholder to resolve later in parse pass 2 + return ParseResult.PlaceholderSymbol(None, text) + elif isinstance(sym, (VariableDef, ConstantDef)): + constant = isinstance(sym, ConstantDef) + if cur_block is symblock: + symbolname = sym.name + else: + symbolname = "{:s}.{:s}".format(sym.blockname, sym.name) + if isinstance(sym, VariableDef) and sym.register: + return ParseResult.RegisterValue(sym.register, sym.type, name=symbolname) + elif sym.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT): + if isinstance(sym, ConstantDef): + symbolvalue = sym.value + else: + symbolvalue = sym.address + return ParseResult.MemMappedValue(symbolvalue, sym.type, sym.length, name=symbolname, constant=constant) # type:ignore + elif sym.type in STRING_DATATYPES: + return ParseResult.StringValue(sym.value, name=symbolname, constant=constant) # type: ignore + elif sym.type == DataType.MATRIX: + raise self.PError("cannot manipulate matrix directly, use one of the matrix procedures") + elif sym.type == DataType.BYTEARRAY or sym.type == DataType.WORDARRAY: + raise self.PError("cannot manipulate array directly, use one of the array procedures") + else: + raise self.PError("invalid symbol type (1)") + else: + raise self.PError("invalid symbol type (2)") + elif text.startswith('[') and text.endswith(']'): + num_or_name = text[1:-1].strip() + word_type = float_type = False + if num_or_name.endswith(".word"): + word_type = True + num_or_name = num_or_name[:-5] + elif num_or_name.endswith(".float"): + float_type = True + num_or_name = num_or_name[:-6] + if num_or_name.isidentifier(): + try: + sym = cur_block.symbols[num_or_name] # type: ignore + except KeyError: + raise self.PError("unknown symbol (2): " + num_or_name) + if isinstance(sym, ConstantDef): + if sym.type == DataType.BYTE and (word_type or float_type): + raise self.PError("byte value required") + elif sym.type == DataType.WORD and float_type: + raise self.PError("word value required") + if type(sym.value) is int: + return ParseResult.MemMappedValue(int(sym.value), sym.type, sym.length, sym.name) + else: + raise TypeError("integer required") + elif isinstance(sym, VariableDef): + if sym.type == DataType.BYTE and (word_type or float_type): + raise self.PError("byte value required") + elif sym.type == DataType.WORD and float_type: + raise self.PError("word value required") + return ParseResult.MemMappedValue(sym.address, sym.type, sym.length, sym.name) + else: + raise self.PError("invalid symbol type used as lvalue of assignment (3)") + else: + addr = parse_expr_as_int(num_or_name, None, self.sourceref) + if word_type: + return ParseResult.MemMappedValue(addr, DataType.WORD, length=1) + elif float_type: + return ParseResult.MemMappedValue(addr, DataType.FLOAT, length=1) + else: + return ParseResult.MemMappedValue(addr, DataType.BYTE, length=1) + else: + raise self.PError("invalid value '" + text + "'") + + def is_identifier(self, name: str) -> bool: + if name.isidentifier(): + return True + blockname, sep, name = name.partition(".") + if sep: + return blockname.isidentifier() and name.isidentifier() + return False + + def _size_from_arraydecl(self, decl: str) -> int: + return parse_expr_as_int(decl[:-1].split("(")[-1], self.cur_block.symbols, self.sourceref) + + def _size_from_matrixdecl(self, decl: str) -> Tuple[int, int]: + dimensions = decl[:-1].split("(")[-1] + try: + xs, ys = dimensions.split(",") + except ValueError: + raise self.PError("invalid matrix dimensions") + return (parse_expr_as_int(xs, self.cur_block.symbols, self.sourceref), + parse_expr_as_int(ys, self.cur_block.symbols, self.sourceref)) + + def psplit(self, sentence: str, separators: str=" \t", lparen: str="(", rparen: str=")") -> List[str]: + """split a sentence but not on separators within parenthesis""" + nb_brackets = 0 + sentence = sentence.strip(separators) # get rid of leading/trailing seps + indices = [0] + for i, c in enumerate(sentence): + if c == lparen: + nb_brackets += 1 + elif c == rparen: + nb_brackets -= 1 + elif c in separators and nb_brackets == 0: + indices.append(i) + # handle malformed string + if nb_brackets < 0: + raise self.PError("syntax error") + + indices.append(len(sentence)) + # handle missing closing parentheses + if nb_brackets > 0: + raise self.PError("syntax error") + result = [sentence[i:j].strip(separators) for i, j in zip(indices, indices[1:])] + return list(filter(None, result)) # remove empty strings + + +class Optimizer: + def __init__(self, parseresult: ParseResult) -> None: + self.parsed = parseresult + + def optimize(self) -> ParseResult: + print("\noptimizing parse tree") + for block in self.parsed.blocks: + self.combine_assignments_into_multi(block) + self.optimize_multiassigns(block) + return self.parsed + + def optimize_multiassigns(self, block: ParseResult.Block) -> None: + # optimize multi-assign statements. + for stmt in block.statements: + if isinstance(stmt, ParseResult.AssignmentStmt) and len(stmt.leftvalues) > 1: + # remove duplicates + lvalues = list(set(stmt.leftvalues)) + if len(lvalues) != len(stmt.leftvalues): + print("{:s}:{:d}: removed duplicate assignment targets".format(block.sourceref.file, stmt.lineno)) + # change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) + stmt.leftvalues = list(sorted(lvalues, key=value_sortkey)) + + def combine_assignments_into_multi(self, block: ParseResult.Block) -> None: + # fold multiple consecutive assignments with the same rvalue into one multi-assignment + statements = [] # type: List[ParseResult._Stmt] + multi_assign_statement = None + for stmt in block.statements: + if isinstance(stmt, ParseResult.AssignmentStmt): + if multi_assign_statement and multi_assign_statement.right == stmt.right: + multi_assign_statement.leftvalues.extend(stmt.leftvalues) + print("{:s}:{:d}: joined with previous line into multi-assign statement".format(block.sourceref.file, stmt.lineno)) + else: + if multi_assign_statement: + statements.append(multi_assign_statement) + multi_assign_statement = stmt + else: + if multi_assign_statement: + statements.append(multi_assign_statement) + multi_assign_statement = None + statements.append(stmt) + if multi_assign_statement: + statements.append(multi_assign_statement) + block.statements = statements + + +def value_sortkey(value: ParseResult.Value) -> int: + if isinstance(value, ParseResult.RegisterValue): + num = 0 + for char in value.register: + num *= 100 + num += ord(char) + return num + elif isinstance(value, ParseResult.MemMappedValue): + if value.address < 0x100: + return 10000 + value.address + else: + return 20000 + value.address + else: + return 99999999 + + +if __name__ == "__main__": + p = Parser("readme.txt", outputdir="output") + p.cur_block = ParseResult.Block("test", SourceRef("testfile", 1), None) + p.parse_subx_def("subx SUBNAME (A, test2:XY, X) -> (A?, X) = $c000") + sub = list(p.cur_block.symbols.iter_subroutines())[0] + import pprint + pprint.pprint(vars(sub)) diff --git a/il65/preprocess.py b/il65/preprocess.py new file mode 100644 index 000000000..5cb0be08f --- /dev/null +++ b/il65/preprocess.py @@ -0,0 +1,61 @@ +""" +Intermediate Language for 6502/6510 microprocessors +This is the preprocessing parser of the IL65 code, that only generates a symbol table. + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +from typing import List, Tuple +from .parse import Parser, ParseResult, SymbolTable, SymbolDefinition + + +class PreprocessingParser(Parser): + def __init__(self, filename: str) -> None: + super().__init__(filename, "", parsing_import=True) + + def preprocess(self) -> Tuple[List[Tuple[int, str]], SymbolTable]: + def cleanup_table(symbols: SymbolTable): + symbols.owning_block = None # not needed here + for name, symbol in list(symbols.symbols.items()): + if isinstance(symbol, SymbolTable): + cleanup_table(symbol) + elif not isinstance(symbol, SymbolDefinition): + del symbols.symbols[name] + self.parse() + cleanup_table(self.root_scope) + return self.lines, self.root_scope + + def load_source(self, filename: str) -> List[Tuple[int, str]]: + lines = super().load_source(filename) + # can do some additional source-level preprocessing here + return lines + + def parse_file(self) -> ParseResult: + print("\npreprocessing", self.sourceref.file) + self._parse_1() + return self.result + + def parse_asminclude(self, line: str) -> ParseResult.InlineAsm: + return ParseResult.InlineAsm(self.sourceref.line, []) + + def parse_statement(self, line: str) -> ParseResult._Stmt: + return None # type: ignore + + def parse_var_def(self, line: str) -> None: + super().parse_var_def(line) + + def parse_const_def(self, line: str) -> None: + super().parse_const_def(line) + + def parse_memory_def(self, line: str, is_zeropage: bool=False) -> None: + super().parse_memory_def(line, is_zeropage) + + def parse_label(self, line: str) -> None: + super().parse_label(line) + + def parse_subx_def(self, line: str) -> None: + super().parse_subx_def(line) + + def create_import_parser(self, filename: str, outputdir: str) -> 'Parser': + return PreprocessingParser(filename) diff --git a/il65/symbols.py b/il65/symbols.py new file mode 100644 index 000000000..abae00dfb --- /dev/null +++ b/il65/symbols.py @@ -0,0 +1,638 @@ +""" +Intermediate Language for 6502/6510 microprocessors +Here are the symbol (name) operations such as lookups, datatype definitions. + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +import inspect +import math +import enum +import builtins +from functools import total_ordering +from typing import Optional, Set, Union, Tuple, Dict, Iterable, Sequence, Any, List + +PrimitiveType = Union[int, float, str] + + +REGISTER_SYMBOLS = {"A", "X", "Y", "AX", "AY", "XY", "SC"} +REGISTER_SYMBOLS_RETURNVALUES = REGISTER_SYMBOLS | {"SZ"} +REGISTER_BYTES = {"A", "X", "Y", "SC"} +REGISTER_WORDS = {"AX", "AY", "XY"} + +# 5-byte cbm MFLPT format limitations: +FLOAT_MAX_POSITIVE = 1.7014118345e+38 +FLOAT_MAX_NEGATIVE = -1.7014118345e+38 + +RESERVED_NAMES = {'true', 'false', 'var', 'memory', 'const', 'asm'} +RESERVED_NAMES |= REGISTER_SYMBOLS + +MATH_SYMBOLS = {name for name in dir(math) if name[0].islower()} +BUILTIN_SYMBOLS = {name for name in dir(builtins) if name[0].islower()} + + +@total_ordering +class DataType(enum.Enum): + """The possible data types of values""" + BYTE = 1 + WORD = 2 + FLOAT = 3 + BYTEARRAY = 4 + WORDARRAY = 5 + MATRIX = 6 + STRING = 7 + STRING_P = 8 + STRING_S = 9 + STRING_PS = 10 + + def assignable_from_value(self, value: PrimitiveType) -> bool: + if isinstance(value, (int, float)): + if self == DataType.BYTE: + return 0 <= value < 0x100 + if self == DataType.WORD: + return 0 <= value < 0x10000 + if self == DataType.FLOAT: + return type(value) in (float, int) + return False + + def __lt__(self, other): + if self.__class__ == other.__class__: + return self.value < other.value + return NotImplemented + + +STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS} + + +class SymbolError(Exception): + pass + + +_identifier_seq_nr = 0 + + +class SourceRef: + __slots__ = ("file", "line", "column") + + def __init__(self, file: str, line: int, column: int=0) -> None: + self.file = file + self.line = line + self.column = column + + def __str__(self) -> str: + if self.column: + return "{:s}:{:d}:{:d}".format(self.file, self.line, self.column) + if self.line: + return "{:s}:{:d}".format(self.file, self.line) + return self.file + + def copy(self) -> 'SourceRef': + return SourceRef(self.file, self.line, self.column) + + +class SymbolDefinition: + def __init__(self, blockname: str, name: str, sourceref: SourceRef, allocate: bool) -> None: + self.blockname = blockname + self.name = name + self.sourceref = sourceref + self.allocate = allocate # set to false if the variable is memory mapped (or a constant) instead of allocated + global _identifier_seq_nr + self.seq_nr = _identifier_seq_nr + _identifier_seq_nr += 1 + + def __lt__(self, other: 'SymbolDefinition') -> bool: + if not isinstance(other, SymbolDefinition): + return NotImplemented + return (self.blockname, self.name, self.seq_nr) < (other.blockname, other.name, self.seq_nr) + + def __str__(self): + return "<{:s} {:s}.{:s}>".format(self.__class__.__name__, self.blockname, self.name) + + +class LabelDef(SymbolDefinition): + pass + + +class VariableDef(SymbolDefinition): + # if address is None, it's a dynamically allocated variable. + # if address is not None, it's a memory mapped variable (=memory address referenced by a name). + def __init__(self, blockname: str, name: str, sourceref: SourceRef, + datatype: DataType, allocate: bool, *, + value: PrimitiveType, length: int, address: Optional[int]=None, + register: str=None, matrixsize: Tuple[int, int]=None) -> None: + super().__init__(blockname, name, sourceref, allocate) + self.type = datatype + self.address = address + self.length = length + self.value = value + self.register = register + self.matrixsize = matrixsize + + @property + def is_memmap(self): + return self.address is not None + + def __repr__(self): + return ""\ + .format(self.blockname, self.name, self.type, str(self.address), str(self.length), str(self.value)) + + def __lt__(self, other: 'SymbolDefinition') -> bool: + if not isinstance(other, VariableDef): + return NotImplemented + v1 = (self.blockname, self.name or "", self.address or 0, self.seq_nr) + v2 = (other.blockname, other.name or "", other.address or 0, self.seq_nr) + return v1 < v2 + + +class ConstantDef(SymbolDefinition): + def __init__(self, blockname: str, name: str, sourceref: SourceRef, datatype: DataType, *, + value: PrimitiveType, length: int) -> None: + super().__init__(blockname, name, sourceref, False) + self.type = datatype + self.length = length + self.value = value + + def __repr__(self): + return ""\ + .format(self.blockname, self.name, self.type, str(self.length), str(self.value)) + + def __lt__(self, other: 'SymbolDefinition') -> bool: + if not isinstance(other, ConstantDef): + return NotImplemented + v1 = (str(self.value) or "", self.blockname, self.name or "", self.seq_nr) + v2 = (str(other.value) or "", other.blockname, other.name or "", self.seq_nr) + return v1 < v2 + + +class SubroutineDef(SymbolDefinition): + def __init__(self, blockname: str, name: str, sourceref: SourceRef, + parameters: Sequence[Tuple[str, str]], returnvalues: Set[str], address: Optional[int]=None) -> None: + super().__init__(blockname, name, sourceref, False) + self.address = address + self.parameters = parameters + self.input_registers = set() # type: Set[str] + self.return_registers = set() # type: Set[str] + self.clobbered_registers = set() # type: Set[str] + for _, param in parameters: + if param in REGISTER_BYTES: + self.input_registers.add(param) + elif param in REGISTER_WORDS: + self.input_registers.add(param[0]) + self.input_registers.add(param[1]) + else: + raise SymbolError("invalid parameter spec: " + param) + for register in returnvalues: + if register in REGISTER_SYMBOLS_RETURNVALUES: + self.return_registers.add(register) + elif register[-1] == "?": + for r in register[:-1]: + if r not in REGISTER_SYMBOLS_RETURNVALUES: + raise SymbolError("invalid return value spec: " + r) + self.clobbered_registers.add(r) + else: + raise SymbolError("invalid return value spec: " + register) + + +class Zeropage: + SCRATCH_B1 = 0x02 + SCRATCH_B2 = 0x03 + + def __init__(self) -> None: + self.unused_bytes = [] # type: List[int] + self.unused_words = [] # type: List[int] + + def configure(self, clobber_zp: bool = False) -> None: + if clobber_zp: + self.unused_bytes = list(range(0x04, 0x80)) + self.unused_words = list(range(0x80, 0x100, 2)) + else: + # these are valid for the C-64: + # ($02 and $03 are reserved as scratch addresses for various routines) + self.unused_bytes = [0x06, 0x0a, 0x2a, 0x52, 0x93] # 5 zp variables (8 bits each) + self.unused_words = [0x04, 0xf7, 0xf9, 0xfb, 0xfd] # 5 zp variables (16 bits each) + assert self.SCRATCH_B1 not in self.unused_bytes and self.SCRATCH_B1 not in self.unused_words + assert self.SCRATCH_B2 not in self.unused_bytes and self.SCRATCH_B2 not in self.unused_words + + def get_unused_byte(self): + return self.unused_bytes.pop() + + def get_unused_word(self): + return self.unused_words.pop() + + @property + def available_byte_vars(self) -> int: + return len(self.unused_bytes) + + @property + def available_word_vars(self) -> int: + return len(self.unused_words) + + +# the single, global Zeropage object +zeropage = Zeropage() + + +class SymbolTable: + + def __init__(self, name: str, parent: Optional['SymbolTable'], owning_block: Any) -> None: + self.name = name + self.symbols = {} # type: Dict[str, Union[SymbolDefinition, SymbolTable]] + self.parent = parent + self.owning_block = owning_block + self.eval_dict = None + + def __iter__(self): + yield from self.symbols.values() + + def __getitem__(self, symbolname: str) -> Union[SymbolDefinition, 'SymbolTable']: + return self.symbols[symbolname] + + def __contains__(self, symbolname: str) -> bool: + return symbolname in self.symbols + + def lookup(self, dottedname: str, include_builtin_names: bool=False) -> Tuple['SymbolTable', Union[SymbolDefinition, 'SymbolTable']]: + nameparts = dottedname.split('.') + if len(nameparts) == 1: + try: + return self, self.symbols[nameparts[0]] + except LookupError: + if include_builtin_names: + if nameparts[0] in MATH_SYMBOLS: + return self, getattr(math, nameparts[0]) + elif nameparts[0] in BUILTIN_SYMBOLS: + return self, getattr(builtins, nameparts[0]) + raise SymbolError("undefined symbol '{:s}'".format(nameparts[0])) + # start from toplevel namespace: + scope = self + while scope.parent: + scope = scope.parent + for namepart in nameparts[:-1]: + try: + scope = scope.symbols[namepart] # type: ignore + assert scope.name == namepart + except LookupError: + raise SymbolError("undefined block '{:s}'".format(namepart)) + if isinstance(scope, SymbolTable): + return scope.lookup(nameparts[-1]) + else: + raise SymbolError("invalid block name '{:s}' in dotted name".format(namepart)) + + def get_address(self, name: str) -> int: + scope, symbol = self.lookup(name) + if isinstance(symbol, ConstantDef): + raise SymbolError("cannot take the address of a constant") + if not symbol or not isinstance(symbol, VariableDef): + raise SymbolError("no var or const defined by that name") + if symbol.address is None: + raise SymbolError("can only take address of memory mapped variables") + return symbol.address + + def as_eval_dict(self) -> Dict[str, Any]: + # return a dictionary suitable to be passed as locals or globals to eval() + if self.eval_dict is None: + d = Eval_symbol_dict(self) + self.eval_dict = d # type: ignore + return self.eval_dict + + def iter_variables(self) -> Iterable[VariableDef]: + yield from sorted((v for v in self.symbols.values() if isinstance(v, VariableDef))) + + def iter_constants(self) -> Iterable[ConstantDef]: + yield from sorted((v for v in self.symbols.values() if isinstance(v, ConstantDef))) + + def iter_subroutines(self) -> Iterable[SubroutineDef]: + yield from sorted((v for v in self.symbols.values() if isinstance(v, SubroutineDef))) + + def iter_labels(self) -> Iterable[LabelDef]: + yield from sorted((v for v in self.symbols.values() if isinstance(v, LabelDef))) + + def check_identifier_valid(self, name: str, sourceref: SourceRef) -> None: + if not name.isidentifier(): + raise SymbolError("invalid identifier") + identifier = self.symbols.get(name, None) + if identifier: + if isinstance(identifier, SymbolDefinition): + raise SymbolError("identifier was already defined at " + str(identifier.sourceref)) + raise SymbolError("identifier already defined as " + str(type(identifier))) + if name in MATH_SYMBOLS: + print("warning: {}: identifier shadows a name from the math module".format(sourceref)) + elif name in BUILTIN_SYMBOLS: + print("warning: {}: identifier shadows a builtin name".format(sourceref)) + + def define_variable(self, name: str, sourceref: SourceRef, datatype: DataType, *, + address: int=None, length: int=0, value: PrimitiveType=0, + matrixsize: Tuple[int, int]=None, register: str=None) -> None: + # this defines a new variable and also checks if the prefill value is allowed for the variable type. + assert value is not None + self.check_identifier_valid(name, sourceref) + range_error = check_value_in_range(datatype, register, length, value) + if range_error: + raise ValueError(range_error) + if type(value) in (int, float): + _, value = coerce_value(sourceref, datatype, value) # type: ignore + allocate = address is None + if datatype == DataType.BYTE: + if allocate and self.name == "ZP": + try: + address = zeropage.get_unused_byte() + except LookupError: + raise SymbolError("too many global 8-bit variables in ZP") + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.BYTE, allocate, + value=value, length=1, address=address) + elif datatype == DataType.WORD: + if allocate and self.name == "ZP": + try: + address = zeropage.get_unused_word() + except LookupError: + raise SymbolError("too many global 16-bit variables in ZP") + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.WORD, allocate, + value=value, length=1, address=address) + elif datatype == DataType.FLOAT: + if allocate and self.name == "ZP": + raise SymbolError("floats cannot be stored in the ZP") + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.FLOAT, allocate, + value=value, length=1, address=address) + elif datatype == DataType.BYTEARRAY: + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.BYTEARRAY, allocate, + value=value, length=length, address=address) + elif datatype == DataType.WORDARRAY: + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.WORDARRAY, allocate, + value=value, length=length, address=address) + elif datatype in (DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS): + self.symbols[name] = VariableDef(self.name, name, sourceref, datatype, True, + value=value, length=len(value)) # type: ignore + elif datatype == DataType.MATRIX: + assert isinstance(matrixsize, tuple) + length = matrixsize[0] * matrixsize[1] + self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.MATRIX, allocate, + value=value, length=length, address=address, matrixsize=matrixsize) + else: + raise ValueError("unknown type " + str(datatype)) + self.eval_dict = None + + def define_sub(self, name: str, sourceref: SourceRef, + parameters: Sequence[Tuple[str, str]], returnvalues: Set[str], address: Optional[int]) -> None: + self.check_identifier_valid(name, sourceref) + self.symbols[name] = SubroutineDef(self.name, name, sourceref, parameters, returnvalues, address) + + def define_label(self, name: str, sourceref: SourceRef) -> None: + self.check_identifier_valid(name, sourceref) + self.symbols[name] = LabelDef(self.name, name, sourceref, False) + + def define_scope(self, scope: 'SymbolTable', sourceref: SourceRef) -> None: + self.check_identifier_valid(scope.name, sourceref) + self.symbols[scope.name] = scope + + def define_constant(self, name: str, sourceref: SourceRef, datatype: DataType, *, + length: int=0, value: PrimitiveType=0) -> None: + # this defines a new constant and also checks if the value is allowed for the data type. + assert value is not None + self.check_identifier_valid(name, sourceref) + if type(value) in (int, float): + _, value = coerce_value(sourceref, datatype, value) # type: ignore + range_error = check_value_in_range(datatype, "", length, value) + if range_error: + raise ValueError(range_error) + if datatype in (DataType.BYTE, DataType.WORD, DataType.FLOAT): + self.symbols[name] = ConstantDef(self.name, name, sourceref, datatype, value=value, length=length or 1) + elif datatype in STRING_DATATYPES: + strlen = len(value) # type: ignore + self.symbols[name] = ConstantDef(self.name, name, sourceref, datatype, value=value, length=strlen) + else: + raise ValueError("invalid data type for constant: " + str(datatype)) + self.eval_dict = None + + def merge_roots(self, other_root: 'SymbolTable') -> None: + for name, thing in other_root.symbols.items(): + if isinstance(thing, SymbolTable): + self.define_scope(thing, thing.owning_block.sourceref) + + def print_table(self, summary_only: bool=False) -> None: + if summary_only: + def count_symbols(symbols: 'SymbolTable') -> int: + count = 0 + for s in symbols.symbols.values(): + if isinstance(s, SymbolTable): + count += count_symbols(s) + else: + count += 1 + return count + print("number of symbols:", count_symbols(self)) + else: + def print_symbols(symbols: 'SymbolTable', level: int) -> None: + indent = '\t' * level + print("\n" + indent + "BLOCK:", symbols.name) + for name, s in sorted(symbols.symbols.items(), key=lambda x: getattr(x[1], "sourceref", ("", 0))): + if isinstance(s, SymbolTable): + print_symbols(s, level + 1) + elif isinstance(s, SubroutineDef): + print(indent * 2 + "SUB: " + s.name, s.sourceref, sep="\t") + elif isinstance(s, LabelDef): + print(indent * 2 + "LABEL: " + s.name, s.sourceref, sep="\t") + elif isinstance(s, VariableDef): + print(indent * 2 + "VAR: " + s.name, s.sourceref, s.type, sep="\t") + elif isinstance(s, ConstantDef): + print(indent * 2 + "CONST: " + s.name, s.sourceref, s.type, sep="\t") + else: + raise TypeError("invalid symbol def type", s) + print("\nSymbols defined in the symbol table:") + print("------------------------------------") + print_symbols(self, 0) + print() + + +class Eval_symbol_dict(dict): + def __init__(self, symboltable: SymbolTable, constants: bool=True) -> None: + super().__init__() + self._symboltable = symboltable + self._constants = constants + + def __getattr__(self, name): + return self.__getitem__(name) + + def __getitem__(self, name): + if name[0] != '_' and name in builtins.__dict__: + return builtins.__dict__[name] + try: + scope, symbol = self._symboltable.lookup(name) + except (LookupError, SymbolError): + # attempt lookup from global scope + global_scope = self._symboltable + while global_scope.parent: + global_scope = global_scope.parent + scope, symbol = global_scope.lookup(name, True) + if self._constants: + if isinstance(symbol, ConstantDef): + return symbol.value + elif isinstance(symbol, VariableDef): + return symbol.value + elif inspect.isbuiltin(symbol): + return symbol + elif isinstance(symbol, SymbolTable): + return symbol.as_eval_dict() + else: + raise SymbolError("invalid datatype referenced" + repr(symbol)) + else: + raise SymbolError("no support for non-constant expression evaluation yet") + + +def coerce_value(sourceref: SourceRef, datatype: DataType, value: PrimitiveType) -> Tuple[bool, PrimitiveType]: + # if we're a BYTE type, and the value is a single character, convert it to the numeric value + if datatype in (DataType.BYTE, DataType.BYTEARRAY, DataType.MATRIX) and isinstance(value, str): + if len(value) == 1: + return True, char_to_bytevalue(value) + # if we're an integer value and the passed value is float, truncate it (and give a warning) + if datatype in (DataType.BYTE, DataType.WORD, DataType.MATRIX) and type(value) is float: + frac = math.modf(value) # type:ignore + if frac != 0: + print("warning: {}: Float value truncated.".format(sourceref)) + return True, int(value) + return False, value + + +def check_value_in_range(datatype: DataType, register: str, length: int, value: PrimitiveType) -> Optional[str]: + if register: + if register in REGISTER_BYTES: + if value < 0 or value > 0xff: # type: ignore + return "value out of range, must be (unsigned) byte for a single register" + elif register in REGISTER_WORDS: + if value is None and datatype in (DataType.BYTE, DataType.WORD): + return None + if value < 0 or value > 0xffff: # type: ignore + return "value out of range, must be (unsigned) word for 2 combined registers" + else: + return "strange register" + elif datatype in (DataType.BYTE, DataType.BYTEARRAY, DataType.MATRIX): + if value is None and datatype == DataType.BYTE: + return None + if value < 0 or value > 0xff: # type: ignore + return "value out of range, must be (unsigned) byte" + elif datatype in (DataType.WORD, DataType.WORDARRAY): + if value is None and datatype in (DataType.BYTE, DataType.WORD): + return None + if value < 0 or value > 0xffff: # type: ignore + return "value out of range, must be (unsigned) word" + elif datatype in STRING_DATATYPES: + if type(value) is not str: + return "value must be a string" + elif datatype == DataType.FLOAT: + if type(value) not in (int, float): + return "value must be a number" + else: + raise SymbolError("missing value check for type", datatype, register, length, value) + return None # all ok ! + + +def char_to_bytevalue(character: str, petscii: bool=True) -> int: + assert len(character) == 1 + if petscii: + return ord(character.translate(ascii_to_petscii_trans)) + else: + raise NotImplementedError("screencode conversion not yet implemented for chars") + + +# ASCII/UNICODE-to-PETSCII translation table +# Unicode symbols supported that map to a PETSCII character: £ ↑ ← ♠ ♥ ♦ ♣ π ● ○ and various others +ascii_to_petscii_trans = str.maketrans({ + '\f': 147, # form feed becomes ClearScreen + '\n': 13, # line feed becomes a RETURN + '\r': 17, # CR becomes CursorDown + 'a': 65, + 'b': 66, + 'c': 67, + 'd': 68, + 'e': 69, + 'f': 70, + 'g': 71, + 'h': 72, + 'i': 73, + 'j': 74, + 'k': 75, + 'l': 76, + 'm': 77, + 'n': 78, + 'o': 79, + 'p': 80, + 'q': 81, + 'r': 82, + 's': 83, + 't': 84, + 'u': 85, + 'v': 86, + 'w': 87, + 'x': 88, + 'y': 89, + 'z': 90, + 'A': 97, + 'B': 98, + 'C': 99, + 'D': 100, + 'E': 101, + 'F': 102, + 'G': 103, + 'H': 104, + 'I': 105, + 'J': 106, + 'K': 107, + 'L': 108, + 'M': 109, + 'N': 110, + 'O': 111, + 'P': 112, + 'Q': 113, + 'R': 114, + 'S': 115, + 'T': 116, + 'U': 117, + 'V': 118, + 'W': 119, + 'X': 120, + 'Y': 121, + 'Z': 122, + '{': 179, # left squiggle + '}': 235, # right squiggle + '£': 92, # pound currency sign + '^': 94, # up arrow + '~': 126, # pi math symbol + 'π': 126, # pi symbol + '`': 39, # single quote + '✓': 250, # check mark + + '|': 221, # vertical bar + '│': 221, # vertical bar + '─': 96, # horizontal bar + '┼': 123, # vertical and horizontal bar + + '↑': 94, # up arrow + '←': 95, # left arrow + + '▔': 163, # upper bar + '_': 164, # lower bar (underscore) + '▁': 164, # lower bar + '▎': 165, # left bar + + '♠': 97, # spades + '●': 113, # circle + '♥': 115, # hearts + '○': 119, # open circle + '♣': 120, # clubs + '♦': 122, # diamonds + + '├': 171, # vertical and right + '┤': 179, # vertical and left + '┴': 177, # horiz and up + '┬': 178, # horiz and down + '└': 173, # up right + '┐': 174, # down left + '┌': 175, # down right + '┘': 189, # up left + '▗': 172, # block lr + '▖': 187, # block ll + '▝': 188, # block ur + '▘': 190, # block ul + '▚': 191, # block ul and lr + '▌': 161, # left half + '▄': 162, # lower half + '▒': 230, # raster +}) diff --git a/lib/c64lib.ill b/lib/c64lib.ill new file mode 100644 index 000000000..0cdf64506 --- /dev/null +++ b/lib/c64lib.ill @@ -0,0 +1,549 @@ +; IL65 definitions for the Commodore-64 +; Including memory registers, I/O registers, Basic and Kernel subroutines, utility subroutines. +; +; Written by Irmen de Jong (irmen@razorvine.net) +; License: GNU GPL 3.0, see LICENSE + +output raw + +~ c64 { + memory SCRATCH_ZP1 = $02 ; scratch register #1 in ZP + memory SCRATCH_ZP2 = $03 ; scratch register #2 in ZP + + memory COLOR = $286 ; cursor color + +; ---- VIC-II registers ---- + + memory SP0X = $d000 + memory SP0Y = $d001 + memory SP1X = $d002 + memory SP1Y = $d003 + memory SP2X = $d004 + memory SP2Y = $d005 + memory SP3X = $d006 + memory SP3Y = $d007 + memory SP4X = $d008 + memory SP4Y = $d009 + memory SP5X = $d00a + memory SP5Y = $d00b + memory SP6X = $d00c + memory SP6Y = $d00d + memory SP7X = $d00e + memory SP7Y = $d00f + + memory MSIGX = $d010 + memory SCROLY = $d011 + memory RASTER = $d012 + memory LPENX = $d013 + memory LPENY = $d014 + memory SPENA = $d015 + memory SCROLX = $d016 + memory YXPAND = $d017 + memory VMCSB = $d018 + memory VICIRQ = $d019 + memory IREQMASK = $d01a + memory SPBGPR = $d01b + memory SPMC = $d01c + memory XXPAND = $d01d + memory SPSPCL = $d01e + memory SPBGCL = $d01f + + memory EXTCOL = $d020 ; border color + memory BGCOL0 = $d021 ; screen color + memory BGCOL1 = $d022 + memory BGCOL2 = $d023 + memory BGCOL4 = $d024 + memory SPMC0 = $d025 + memory SPMC1 = $d026 + memory SP0COL = $d027 + memory SP1COL = $d028 + memory SP2COL = $d029 + memory SP3COL = $d02a + memory SP4COL = $d02b + memory SP5COL = $d02c + memory SP6COL = $d02d + memory SP7COL = $d02e + +; ---- end of VIC-II registers ---- + +; ---- C64 basic and kernal ROM float constants and functions ---- + + ; note: the fac1 and fac2 are working registers and take 6 bytes each, + ; floats in memory (and rom) are stored in 5-byte MFLPT packed format. + + ; constants in five-byte "mflpt" format in the BASIC ROM + memory .float FL_PIVAL = $aea8 ; 3.1415926... + memory .float FL_N32768 = $b1a5 ; -32768 + memory .float FL_FONE = $b9bc ; 1 + memory .float FL_SQRHLF = $b9d6 ; SQR(2) / 2 + memory .float FL_SQRTWO = $b9db ; SQR(2) + memory .float FL_NEGHLF = $b9e0 ; -.5 + memory .float FL_LOG2 = $b9e5 ; LOG(2) + memory .float FL_TENC = $baf9 ; 10 + memory .float FL_NZMIL = $bdbd ; 1e9 (1 billion) + memory .float FL_FHALF = $bf11 ; .5 + memory .float FL_LOGEB2 = $bfbf ; 1 / LOG(2) + memory .float FL_PIHALF = $e2e0 ; PI / 2 + memory .float FL_TWOPI = $e2e5 ; 2 * PI + memory .float FL_FR4 = $e2ea ; .25 + + +; @todo verify clobbered registers? +; note: fac1/2 might get clobbered even if not mentioned in the function's name. +; note: for subtraction and division, the left operand is in fac2, the right operand in fac1. + +; checked functions below: +subx MOVFM (mflpt: AY) -> (A?, Y?) = $bba2 ; load mflpt value from memory in A/Y into fac1 +subx FREADMEM () -> (A?, Y?) = $bba6 ; load mflpt value from memory in $22/$23 into fac1 +subx CONUPK (mflpt: AY) -> (A?, Y?) = $ba8c ; load mflpt value from memory in A/Y into fac2 +subx FAREADMEM () -> (A?, Y?) = $ba90 ; load mflpt value from memory in $22/$23 into fac2 +subx MOVFA () -> (A?, X?) = $bbfc ; copy fac2 to fac1 +subx MOVAF () -> (A?, X?) = $bc0c ; copy fac1 to fac2 (rounded) +subx MOVEF () -> (A?, X?) = $bc0f ; copy fac1 to fac2 +subx FTOMEMXY (mflpt: XY) -> (A?, Y?) = $bbd4 ; store fac1 to memory X/Y as 5-byte mflpt +subx FTOSWORDYA () -> (Y, A, X?) = $b1aa ; fac1-> signed word in Y/A (might throw ILLEGAL QUANTITY) + ; use c64util.FTOSWRDAY to get A/Y output (lo/hi switched to normal order) +subx GETADR () -> (Y, A, X?) = $b7f7 ; fac1 -> unsigned word in Y/A (might throw ILLEGAL QUANTITY) + ; (result also in $14/15) use c64util.GETADRAY to get A/Y output (lo/hi switched to normal order) +subx QINT () -> (A?, X?, Y?) = $bc9b ; fac1 -> 4-byte signed integer in 98-101 ($62-$65), with the MSB FIRST. +subx AYINT () -> (A?, X?, Y?) = $b1bf ; fac1-> signed word in 100-101 ($64-$65) MSB FIRST. (might throw ILLEGAL QUANTITY) +subx GIVAYF (lo: Y, hi: A) -> (A?, X?, Y?) = $b391 ; signed word in Y/A -> float in fac1 + ; use c64util.GIVAYFAY to use A/Y input (lo/hi switched to normal order) +; there is also c64util.GIVUAYF - unsigned word in A/Y (lo/hi) to fac1 +; there is also c64util.FREADS32 that reads from 98-101 ($62-$65) MSB FIRST +; there is also c64util.FREADUS32 that reads from 98-101 ($62-$65) MSB FIRST +; there is also c64util.FREADS24AXY that reads signed int24 into fac1 from A/X/Y (lo/mid/hi bytes) +subx FREADUY (ubyte: Y) -> (A?, X?, Y?) = $b3a2 ; 8 bit unsigned Y -> float in fac1 +subx FREADSA (sbyte: A) -> (A?, X?, Y?) = $bc3c ; 8 bit signed A -> float in fac1 +subx FREADSTR (len: A) -> (A?, X?, Y?) = $b7b5 ; str -> fac1, $22/23 must point to string, A=string length +subx FPRINTLN () -> (A?, X?, Y?) = $aabc ; print string of fac1, on one line (= with newline) +subx FOUT () -> (AY, X?) = $bddd ; fac1 -> string, address returned in AY ($0100) + +subx FADDH () -> (A?, X?, Y?) = $b849 ; fac1 += 0.5, for rounding- call this before INT +subx MUL10 () -> (A?, X?, Y?) = $bae2 ; fac1 *= 10 +subx DIV10 () -> (A?, X?, Y?) = $bafe ; fac1 /= 10 , CAUTION: result is always positive! +subx FCOMP (mflpt: AY) -> (A, X?, Y?) = $bc5b ; A = compare fac1 to mflpt in A/Y, 0=equal 1=fac1 is greater, 255=fac1 is less than + +subx FADDT () -> (A?, X?, Y?) = $b86a ; fac1 += fac2 +subx FADD (mflpt: AY) -> (A?, X?, Y?) = $b867 ; fac1 += mflpt value from A/Y +subx FSUBT () -> (A?, X?, Y?) = $b853 ; fac1 = fac2-fac1 mind the order of the operands +subx FSUB (mflpt: AY) -> (A?, X?, Y?) = $b850 ; fac1 = mflpt from A/Y - fac1 +subx FMULTT () -> (A?, X?, Y?) = $ba2b ; fac1 *= fac2 +subx FMULT (mflpt: AY) -> (A?, X?, Y?) = $ba28 ; fac1 *= mflpt value from A/Y +subx FDIVT () -> (A?, X?, Y?) = $bb12 ; fac1 = fac2/fac1 mind the order of the operands +subx FDIV (mflpt: AY) -> (A?, X?, Y?) = $bb0f ; fac1 = mflpt in A/Y / fac1 +subx FPWRT () -> (A?, X?, Y?) = $bf7b ; fac1 = fac2 ** fac1 +subx FPWR (mflpt: AY) -> (A?, X?, Y?) = $bf78 ; fac1 = fac2 ** mflpt from A/Y + +subx NOTOP () -> (A?, X?, Y?) = $aed4 ; fac1 = NOT(fac1) +subx INT () -> (A?, X?, Y?) = $bccc ; INT() truncates, use FADDH first to round instead of trunc +subx LOG () -> (A?, X?, Y?) = $b9ea ; fac1 = LN(fac1) (natural log) +subx SGN () -> (A?, X?, Y?) = $bc39 ; fac1 = SGN(fac1), result of SIGN (-1, 0 or 1) +subx SIGN () -> (A) = $bc2b ; SIGN(fac1) to A, $ff, $0, $1 for negative, zero, positive +subx ABS () -> () = $bc58 ; fac1 = ABS(fac1) +subx SQR () -> (A?, X?, Y?) = $bf71 ; fac1 = SQRT(fac1) +subx EXP () -> (A?, X?, Y?) = $bfed ; fac1 = EXP(fac1) (e ** fac1) +subx NEGOP () -> (A?) = $bfb4 ; switch the sign of fac1 +subx RND () -> (A?, X?, Y?) = $e097 ; fac1 = RND() +subx COS () -> (A?, X?, Y?) = $e264 ; fac1 = COS(fac1) +subx SIN () -> (A?, X?, Y?) = $e26b ; fac1 = SIN(fac1) +subx TAN () -> (A?, X?, Y?) = $e2b4 ; fac1 = TAN(fac1) +subx ATN () -> (A?, X?, Y?) = $e30e ; fac1 = ATN(fac1) + + +; ---- C64 basic routines ---- + +subx CLEARSCR () -> (A?, X?, Y?) = $E544 ; clear the screen +subx HOMECRSR () -> (A?, X?, Y?) = $E566 ; cursor to top left of screen + + +; ---- end of C64 basic routines ---- + + + +; ---- C64 kernal routines ---- + +subx CINT () -> (A?, X?, Y?) = $FF81 ; (alias: SCINIT) initialize screen editor and video chip +subx IOINIT () -> (A?, X?) = $FF84 ; initialize I/O devices +subx RAMTAS () -> (A?, X?, Y?) = $FF87 ; initialize RAM, tape buffer, screen +subx RESTOR () -> () = $FF8A ; restore default I/O vectors +subx VECTOR (dir: SC, userptr: XY) -> (A?, Y?) = $FF8D ; read/set I/O vector table +subx SETMSG (value: A) -> () = $FF90 ; set Kernal message control flag +subx SECOND (address: A) -> (A?) = $FF93 ; (alias: LSTNSA) send secondary address after LISTEN +subx TKSA (address: A) -> (A?) = $FF96 ; (alias: TALKSA) send secondary address after TALK +subx MEMTOP (dir: SC, address: XY) -> (XY) = $FF99 ; read/set top of memory pointer +subx MEMBOT (dir: SC, address: XY) -> (XY) = $FF9C ; read/set bottom of memory pointer +subx SCNKEY () -> (A?, X?, Y?) = $FF9F ; scan the keyboard +subx SETTMO (timeout: A) -> () = $FFA2 ; set time-out flag for IEEE bus +subx ACPTR () -> (A) = $FFA5 ; (alias: IECIN) input byte from serial bus +subx CIOUT (byte: A) -> () = $FFA8 ; (alias: IECOUT) output byte to serial bus +subx UNTLK () -> (A?) = $FFAB ; command serial bus device to UNTALK +subx UNLSN () -> (A?) = $FFAE ; command serial bus device to UNLISTEN +subx LISTEN (device: A) -> (A?) = $FFB1 ; command serial bus device to LISTEN +subx TALK (device: A) -> (A?) = $FFB4 ; command serial bus device to TALK +subx READST () -> (A) = $FFB7 ; read I/O status word +subx SETLFS (logical: A, device: X, address: Y) -> () = $FFBA ; set logical file parameters +subx SETNAM (namelen: A, filename: XY) -> () = $FFBD ; set filename parameters +subx OPEN () -> (A?, X?, Y?) = $FFC0 ; (via 794 ($31A)) open a logical file +subx CLOSE (logical: A) -> (A?, X?, Y?) = $FFC3 ; (via 796 ($31C)) close a logical file +subx CHKIN (logical: X) -> (A?, X?) = $FFC6 ; (via 798 ($31E)) define an input channel +subx CHKOUT (logical: X) -> (A?, X?) = $FFC9 ; (via 800 ($320)) define an output channel +subx CLRCHN () -> (A?, X?) = $FFCC ; (via 802 ($322)) restore default devices +subx CHRIN () -> (A, Y?) = $FFCF ; (via 804 ($324)) input a character +subx CHROUT (char: A) -> () = $FFD2 ; (via 806 ($326)) output a character +subx LOAD (verify: A, address: XY) -> (SC, A, X, Y) = $FFD5 ; (via 816 ($330)) load from device +subx SAVE (zp_startaddr: A, endaddr: XY) -> (SC, A) = $FFD8 ; (via 818 ($332)) save to a device +subx SETTIM (low: A, middle: X, high: Y) -> () = $FFDB ; set the software clock +subx RDTIM () -> (A, X, Y) = $FFDE ; read the software clock +subx STOP () -> (SZ, SC, A?, X?) = $FFE1 ; (via 808 ($328)) check the STOP key +subx GETIN () -> (A, X?, Y?) = $FFE4 ; (via 810 ($32A)) get a character +subx CLALL () -> (A?, X?) = $FFE7 ; (via 812 ($32C)) close all files +subx UDTIM () -> (A?, X?) = $FFEA ; update the software clock +subx SCREEN () -> (X, Y) = $FFED ; read number of screen rows and columns +subx PLOT (dir: SC, col: X, row: Y) -> (X, Y) = $FFF0 ; read/set position of cursor on screen +subx IOBASE () -> (X, Y) = $FFF3 ; read base address of I/O devices + +; ---- end of C64 kernal routines ---- + + memory .word NMI_VEC = $FFFA + memory .word RESET_VEC = $FFFC + memory .word IRQ_VEC = $FFFE + +} + +~ c64util { + +; @todo use user-defined subroutines here to have param definitions + +; ---- fac1 = signed int32 from $62-$65 big endian (MSB FIRST) +FREADS32 ; () -> (A?, X?, Y?) + asm { + lda $62 + eor #$ff + asl a + lda #0 + ldx #$a0 + jmp $bc4f + } + +; ---- fac1 = uint32 from $62-$65 big endian (MSB FIRST) +FREADUS32 ; () -> (A?, X?, Y?) + asm { + sec + lda #0 + ldx #$a0 + jmp $bc4f + } + +; ---- fac1 = signed int24 (A/X/Y contain lo/mid/hi bytes) +; note: there is no FREADU24AXY (unsigned), use FREADUS32 instead. +FREADS24AXY ; (lo: A, mid: X, hi: Y) -> (A?, X?, Y?) + asm { + sty $62 + stx $63 + sta $64 + lda $62 + eor #$FF + asl a + lda #0 + sta $65 + ldx #$98 + jmp $bc4f + } + + +; ---- unsigned 16 bit word in A/Y (lo/hi) to fac1 +GIVUAYF ; (uword: AY) -> (A?, X?, Y?) + asm { + sty $62 + sta $63 + ldx #$90 + sec + jmp $bc49 + } + +; ---- signed 16 bit word in A/Y (lo/hi) to float in fac1 +GIVAYFAY ; (sword: AY) -> (A?, X?, Y?) + asm { + sta c64.SCRATCH_ZP1 + tya + ldy c64.SCRATCH_ZP1 + jmp c64.GIVAYF ; this uses the inverse order, Y/A + } + +; ---- fac1 to signed word in A/Y +FTOSWRDAY ; () -> (A, Y, X?) + asm { + jsr c64.FTOSWORDYA ; note the inverse Y/A order + sta c64.SCRATCH_ZP1 + tya + ldy c64.SCRATCH_ZP1 + rts + } + +; ---- fac1 to unsigned word in A/Y +GETADRAY ; () -> (A, Y, X?) + asm { + jsr c64.GETADR ; this uses the inverse order, Y/A + sta c64.SCRATCH_ZP1 + tya + ldy c64.SCRATCH_ZP1 + rts + } + + +; ---- print null terminated string from X/Y +print_string ; (address: XY) -> (A?, Y?) + asm { + stx c64.SCRATCH_ZP1 + sty c64.SCRATCH_ZP2 + ldy #0 +- lda (c64.SCRATCH_ZP1),y + beq + + jsr c64.CHROUT + iny + bne - ++ rts + } + +; ---- print pstring (length as first byte) from X/Y, returns str len in Y +print_pstring ; (address: XY) -> (A?, X?, Y) + asm { + stx c64.SCRATCH_ZP1 + sty c64.SCRATCH_ZP2 + ldy #0 + lda (c64.SCRATCH_ZP1),y + beq + + tax +- iny + lda (c64.SCRATCH_ZP1),y + jsr c64.CHROUT + dex + bne - ++ rts ; output string length is in Y + } + + +; ---- print pstring in memory immediately following the fcall instruction (don't use call!) +print_pimmediate + asm { + tsx + lda $102,x + tay ; put high byte in y + lda $101,x + tax ; and low byte in x. + inx + bne + + iny ++ jsr print_pstring ; print string in XY, returns string length in y. + tya + tsx + clc + adc $101,x ; add content of 1st (length) byte to return addr. + bcc + ; if that made the low byte roll over to 00, + inc $102,x ; then increment the high byte too. ++ clc + adc #1 ; now add 1 for the length byte itself. + sta $101,x + bne + ; if that made it (the low byte) roll over to 00, + inc $102,x ; increment the high byte of the return addr too. ++ rts + } + + +; ---- A to decimal string in Y/X/A (100s in Y, 10s in X, 1s in A) +byte2decimal ; (ubyte: A) -> (Y, X, A) + asm { + ldy #$2f + ldx #$3a + sec +- iny + sbc #100 + bcs - +- dex + adc #10 + bmi - + adc #$2f + rts + } + +; ---- A to hex string in XY (first hex char in X, second hex char in Y) +byte2hex ; (ubyte: A) -> (X, Y, A?) + asm { + pha + and #$0f + tax + ldy hex_digits,x + pla + lsr a + lsr a + lsr a + lsr a + tax + lda hex_digits,x + tax + rts + +hex_digits .text "0123456789abcdef" ; can probably be reused for other stuff as well + } + + + +; Convert an 16 bit binary value to BCD +; +; This function converts a 16 bit binary value in X/Y into a 24 bit BCD. It +; works by transferring one bit a time from the source and adding it +; into a BCD value that is being doubled on each iteration. As all the +; arithmetic is being done in BCD the result is a binary to decimal +; conversion. + + var .array(3) word2bcd_bcdbuff + +word2bcd ; (address: XY) -> (A?, X?) + asm { + stx c64.SCRATCH_ZP1 + sty c64.SCRATCH_ZP2 + sed ; switch to decimal mode + lda #0 ; ensure the result is clear + sta word2bcd_bcdbuff+0 + sta word2bcd_bcdbuff+1 + sta word2bcd_bcdbuff+2 + ldx #16 ; the number of source bits + +- asl c64.SCRATCH_ZP1 ; shift out one bit + rol c64.SCRATCH_ZP2 + lda word2bcd_bcdbuff+0 ; and add into result + adc word2bcd_bcdbuff+0 + sta word2bcd_bcdbuff+0 + lda word2bcd_bcdbuff+1 ; propagating any carry + adc word2bcd_bcdbuff+1 + sta word2bcd_bcdbuff+1 + lda word2bcd_bcdbuff+2 ; ... thru whole result + adc word2bcd_bcdbuff+2 + sta word2bcd_bcdbuff+2 + dex ; and repeat for next bit + bne - + cld ; back to binary + rts + } + + +; ---- convert 16 bit word in X/Y into decimal string into memory 'word2decimal_output' + var .array(5) word2decimal_output + +word2decimal ; (address: XY) -> (A?, X?, Y?) + asm { + jsr word2bcd + lda word2bcd_bcdbuff+2 + clc + adc #'0' + sta word2decimal_output + ldy #1 + lda word2bcd_bcdbuff+1 + jsr + + lda word2bcd_bcdbuff+0 + ++ pha + lsr a + lsr a + lsr a + lsr a + clc + adc #'0' + sta word2decimal_output,y + iny + pla + and #$0f + adc #'0' + sta word2decimal_output,y + iny + rts + } + +; ---- print the byte in A in decimal form, with left padding 0s (3 positions total) +print_byte_decimal0 ; (ubyte: A) -> (A?, X?, Y?) + asm { + jsr byte2decimal + pha + tya + jsr c64.CHROUT + txa + jsr c64.CHROUT + pla + jmp c64.CHROUT + } + +; ---- print the byte in A in decimal form, without left padding 0s +print_byte_decimal ; (ubyte: A) -> (A?, X?, Y?) + asm { + jsr byte2decimal + pha + tya + cmp #'0' + beq + + jsr c64.CHROUT ++ txa + cmp #'0' + beq + + jsr c64.CHROUT ++ pla + jmp c64.CHROUT + } + +; ---- print the byte in A in hex form +print_byte_hex ; (ubyte: A) -> (A?, X?, Y?) + asm { + jsr byte2hex + txa + jsr c64.CHROUT + tya + jmp c64.CHROUT + } + +; ---- print the word in X/Y in decimal form, with left padding 0s (5 positions total) +print_word_decimal0 ; (address: XY) -> (A?, X?, Y?) + asm { + jsr word2decimal + lda word2decimal_output + jsr c64.CHROUT + lda word2decimal_output+1 + jsr c64.CHROUT + lda word2decimal_output+2 + jsr c64.CHROUT + lda word2decimal_output+3 + jsr c64.CHROUT + lda word2decimal_output+4 + jmp c64.CHROUT + } + +; ---- print the word in X/Y in decimal form, without left padding 0s +print_word_decimal ; (address: XY) -> (A?, X? Y?) + asm { + jsr word2decimal + ldy #0 + lda word2decimal_output + cmp #'0' + bne _pr_decimal + iny + lda word2decimal_output+1 + cmp #'0' + bne _pr_decimal + iny + lda word2decimal_output+2 + cmp #'0' + bne _pr_decimal + iny + lda word2decimal_output+3 + cmp #'0' + bne _pr_decimal + iny + +_pr_decimal + lda word2decimal_output,y + jsr c64.CHROUT + iny + cpy #5 + bcc _pr_decimal + rts + } +} diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..8cc8b9027 --- /dev/null +++ b/mypy.ini @@ -0,0 +1,4 @@ +[mypy] +follow_imports = normal +ignore_missing_imports = True +incremental = True diff --git a/reference.txt b/reference.txt new file mode 100644 index 000000000..462d0cc15 --- /dev/null +++ b/reference.txt @@ -0,0 +1,386 @@ +------------------------------------------------------------ +il65 - "Intermediate Language for 6502/6510 microprocessors" +------------------------------------------------------------ + Written by Irmen de Jong (irmen@razorvine.net) + License: GNU GPL 3.0, see LICENSE +------------------------------------------------------------ + + +The python program parses it and generates 6502 assembler code. +It uses the 64tass macro cross assembler to assemble it into binary files. + + + +Memory Model +------------ + +Zero page: $00 - $ff +Hardware stack: $100 - $1ff +Free RAM/ROM: $0200 - $ffff + +Reserved: + +data direction $00 +bank select $01 +NMI VECTOR $fffa +RESET VECTOR $fffc +IRQ VECTOR $fffe + +A particular 6502/6510 machine such as the Commodore-64 will have many other +special addresses due to: + - ROMs installed in the machine (basic, kernel and character generator roms) + - memory-mapped I/O registers (for the video and sound chip for example) + - RAM areas used for screen graphics and sprite data. + + +Usable Hardware registers: + A, X, Y, + AX, AY, XY (16-bit combined register pairs) + SC (status register Carry flag) + These cannot occur as variable names - they will always refer to the hardware registers. + + +The zero page locations $02-$ff can be regarded as 254 other registers. +Free zero page addresses on the C-64: + $02,$03 # reserved as scratch addresses + $04,$05 + $06 + $0a + $2a + $52 + $93 + $f7,$f8 + $f9,$fa + $fb,$fc + $fd,$fe + + + +IL program parsing structure: +----------------------------- + + +OUTPUT MODES: +------------- +output raw ; no load address bytes +output prg ; include the first two load address bytes, (default is $0801), no basic program +output prg,sys ; include the first two load address bytes, basic start program with sys call to code, default code start + ; immediately after the basic program at $081d, or beyond. + +address $0801 ; override program start address (default is set to $c000 for raw mode and $0801 for c-64 prg mode) + ; cannot be used if output mode is prg,sys because basic programs always have to start at $0801 + + +data types: + byte 8 bits $8f (unsigned, @todo signed bytes) + int 16 bits $8fee (unsigned, @todo signed ints) + bool true/false (aliases for the integer values 1 and 0, not a true datatype by itself) + char '@' (converted to a byte) + float 40 bits 1.2345 (stored in 5-byte cbm MFLPT format) + @todo 24 and 32 bits integers, unsigned and signed? + string 0-terminated sequence of bytes "hello." (implicit 0-termination byte) + pstring sequence of bytes where first byte is the length. (no 0-termination byte) + For strings, both petscii and screencode variants can be written in source, they will be translated at compile/assembler time. + + + Note: for many floating point operations, the compiler uses routines in the C64 BASIC and KERNAL ROMs. + So they will only work if the BASIC ROM (and KERNAL ROM) are banked in. + largest 5-byte MFLPT float: 1.7014118345e+38 (negative: -1.7014118345e+38) + + + Note: with the # prefix you can take the address of something. This is sometimes useful, + for instance when you want to manipulate the ADDRESS of a memory mapped variable rather than + the value it represents. You can take the address of a string as well, but the compiler already + treats those as a value that you manipulate via its address, so the # is ignored here. + + + +BLOCKS +------ + +~ blockname [address] { + statements +} + +The blockname "ZP" is reserved and always means the ZeroPage. Its start address is always set to $04, +because $00/$01 are used by the hardware and $02/$03 are reserved as general purpose scratch registers. + +Block names cannot occur more than once, EXCEPT 'ZP' where the contents of every occurrence of it are merged. +Block address must be >= $0200 (because $00-$fff is the ZP and $100-$200 is the cpu stack) + +You can omit the blockname but then you can only refer to the contents of the block via its absolute address, +which is required in this case. If you omit both, the block is ignored altogether (and a warning is displayed). + + +IMPORTING, INCLUDING and BINARY-INCLUDING files +----------------------------------------------- + +import "filename[.ill]" + Can only be used outside of a block (usually at the top of your file). + Reads everything from the named IL65 file at this point and compile it as a normal part of the program. + +asminclude "filename.txt", scopelabel + Can only be used in a block. + The assembler will include the file as asm source text at this point, il65 will not process this at all. + The scopelabel will be used as a prefix to access the labels from the included source code, + otherwise you would risk symbol redefinitions or duplications. + +asmbinary "filename.bin" [, [, ]] + Can only be used in a block. + The assembler will include the file as binary bytes at this point, il65 will not process this at all. + The optional offset and length can be used to select a particular piece of the file. + + + +MACROS +------ + +@todo macros are meta-code (written in Python syntax) that actually runs in a preprecessing step +during the compilation, and produces output value that is then replaced on that point in the input source. +Allows us to create pre calculated sine tables and such. Something like: + + var .array sinetable ``[sin(x) * 10 for x in range(100)]`` + + + +EXPRESSIONS +----------- + +In most places where a number or other value is expected, you can use just the number, or a full constant expression. +The expression is parsed and evaluated by Python itself at compile time, and the (constant) resulting value is used in its place. +Ofcourse the special il65 syntax for hexadecimal numbers ($xxxx), binary numbers (%bbbbbb), +and the address-of (#xxxx) is supported. Other than that it must be valid Python syntax. +Expressions can contain function calls to the math library (sin, cos, etc) and you can also use +all builtin functions (max, avg, min, sum etc). They can also reference idendifiers defined elsewhere in your code, +if this makes sense. + +The syntax "[address]" means: the contents of the memory at address. +By default, if not otherwise known, a single byte is assumed. You can add the ".byte" or ".word" or ".float" suffix +to make it clear what data type the address points to. + +Everything after a semicolon ';' is a comment and is ignored. +# @todo Everything after a double semicolon ';;' is a comment and is ignored, but is copied into the resulting assembly source code. + + + +FLOW CONTROL +------------ + +Required building blocks: additional forms of 'go' statement: including an if clause, comparison statement. + +- a primitive conditional branch instruction (special case of 'go'): directly translates to a branch instruction: + if[_XX] go