From 2110e7afeff6fe6d581df99b2de7a49e601616db Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Thu, 21 Dec 2017 22:16:46 +0100 Subject: [PATCH] parsing now uses preprocessed symbol table and no longer fixes placeholders in the second parse phase --- il65/__main__.py | 4 +- il65/astparse.py | 25 +-- il65/{il65.py => codegen.py} | 178 +++++----------- il65/main.py | 50 +++++ il65/parse.py | 403 ++++++++++++++++------------------- il65/preprocess.py | 2 +- il65/symbols.py | 23 +- tests/test_floats.py | 62 +++--- testsource/calls.ill | 12 ++ testsource/source1.ill | 4 +- 10 files changed, 365 insertions(+), 398 deletions(-) rename il65/{il65.py => codegen.py} (87%) create mode 100644 il65/main.py diff --git a/il65/__main__.py b/il65/__main__.py index 1cc918a24..521cc308c 100644 --- a/il65/__main__.py +++ b/il65/__main__.py @@ -5,5 +5,5 @@ Written by Irmen de Jong (irmen@razorvine.net) License: GNU GPL 3.0, see LICENSE """ -from . import il65 -il65.main() +from . import main +main.main() diff --git a/il65/astparse.py b/il65/astparse.py index 5badd18cb..8fa3e4a74 100644 --- a/il65/astparse.py +++ b/il65/astparse.py @@ -61,33 +61,33 @@ class SourceLine: return text -def parse_expr_as_int(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, +def parse_expr_as_int(text: str, context: Optional[SymbolTable], ppcontext: Optional[SymbolTable], sourceref: SourceRef, *, minimum: int=0, maximum: int=0xffff) -> int: - result = parse_expr_as_primitive(text, context, sourceref, minimum=minimum, maximum=maximum) + result = parse_expr_as_primitive(text, context, ppcontext, sourceref, minimum=minimum, maximum=maximum) if isinstance(result, int): return result src = SourceLine(text, sourceref) raise src.to_error("int expected, not " + type(result).__name__) -def parse_expr_as_number(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, +def parse_expr_as_number(text: str, context: Optional[SymbolTable], ppcontext: Optional[SymbolTable], sourceref: SourceRef, *, minimum: float=FLOAT_MAX_NEGATIVE, maximum: float=FLOAT_MAX_POSITIVE) -> Union[int, float]: - result = parse_expr_as_primitive(text, context, sourceref, minimum=minimum, maximum=maximum) + result = parse_expr_as_primitive(text, context, ppcontext, sourceref, minimum=minimum, maximum=maximum) if isinstance(result, (int, float)): return result src = SourceLine(text, sourceref) raise src.to_error("int or float expected, not " + type(result).__name__) -def parse_expr_as_string(text: str, context: Optional[SymbolTable], sourceref: SourceRef) -> str: - result = parse_expr_as_primitive(text, context, sourceref) +def parse_expr_as_string(text: str, context: Optional[SymbolTable], ppcontext: Optional[SymbolTable], sourceref: SourceRef) -> str: + result = parse_expr_as_primitive(text, context, ppcontext, sourceref) if isinstance(result, str): return result src = SourceLine(text, sourceref) raise src.to_error("string expected, not " + type(result).__name__) -def parse_expr_as_primitive(text: str, context: Optional[SymbolTable], sourceref: SourceRef, *, +def parse_expr_as_primitive(text: str, context: Optional[SymbolTable], ppcontext: Optional[SymbolTable], sourceref: SourceRef, *, minimum: float = FLOAT_MAX_NEGATIVE, maximum: float = FLOAT_MAX_POSITIVE) -> PrimitiveType: src = SourceLine(text, sourceref) text = src.preprocess() @@ -96,7 +96,7 @@ def parse_expr_as_primitive(text: str, context: Optional[SymbolTable], sourceref except SyntaxError as x: raise src.to_error(str(x)) if isinstance(node, ast.Expression): - result = ExpressionTransformer(src, context).evaluate(node) + result = ExpressionTransformer(src, context, ppcontext).evaluate(node) else: raise TypeError("ast.Expression expected") if isinstance(result, bool): @@ -118,10 +118,11 @@ def parse_statement(text: str, sourceref: SourceRef) -> int: # @todo in progr class EvaluatingTransformer(ast.NodeTransformer): - def __init__(self, src: SourceLine, context: SymbolTable) -> None: + def __init__(self, src: SourceLine, context: SymbolTable, ppcontext: SymbolTable) -> None: super().__init__() self.src = src self.context = context + self.ppcontext = ppcontext def error(self, message: str, column: int=0) -> ParseError: if column: @@ -136,14 +137,14 @@ class EvaluatingTransformer(ast.NodeTransformer): code = compile(node, self.src.sourceref.file, mode="eval") if self.context: globals = None - locals = self.context.as_eval_dict() + locals = self.context.as_eval_dict(self.ppcontext) else: globals = {"__builtins__": {}} locals = None try: result = eval(code, globals, locals) except Exception as x: - raise self.src.to_error(str(x)) + raise self.src.to_error(str(x)) from x else: if type(result) is bool: return int(result) @@ -202,5 +203,5 @@ class ExpressionTransformer(EvaluatingTransformer): if __name__ == "__main__": symbols = SymbolTable("", None, None) symbols.define_variable("derp", SourceRef("", 1), DataType.BYTE, address=2345) - result = parse_expr_as_primitive("2+#derp", symbols, SourceRef("", 1)) + result = parse_expr_as_primitive("2+#derp", symbols, None, SourceRef("", 1)) print("EXPRESSION RESULT:", result) diff --git a/il65/il65.py b/il65/codegen.py similarity index 87% rename from il65/il65.py rename to il65/codegen.py index d33dc80cd..a4052877f 100644 --- a/il65/il65.py +++ b/il65/codegen.py @@ -1,25 +1,20 @@ -#! /usr/bin/env python3 - """ Intermediate Language for 6502/6510 microprocessors, codename 'Sick' -This is the main program and assembly code generator (from the parse tree) +This is the assembly code generator (from the parse tree) Written by Irmen de Jong (irmen@razorvine.net) License: GNU GPL 3.0, see LICENSE """ -import os import io import math import datetime import subprocess import contextlib -import argparse from functools import partial from typing import TextIO, Set, Union -from .preprocess import PreprocessingParser -from .parse import ProgramFormat, Parser, ParseResult, Optimizer -from .symbols import Zeropage, DataType, VariableDef, REGISTER_WORDS, FLOAT_MAX_NEGATIVE, FLOAT_MAX_POSITIVE +from .parse import ProgramFormat, ParseResult, Parser +from .symbols import Zeropage, DataType, VariableDef, SubroutineDef, REGISTER_WORDS, FLOAT_MAX_NEGATIVE, FLOAT_MAX_POSITIVE class CodeError(Exception): @@ -35,6 +30,7 @@ class CodeGenerator: self.cur_block = None # type: ParseResult.Block def generate(self) -> None: + print("\ngenerating assembly code") self.sanitycheck() self.header() self.initialize_variables() @@ -75,7 +71,7 @@ class CodeGenerator: if self.parsed.format == ProgramFormat.PRG: if self.parsed.with_sys: self.p("; ---- basic program with sys call ----") - self.p("* = " + self.to_hex(self.parsed.start_address)) + self.p("* = " + Parser.to_hex(self.parsed.start_address)) year = datetime.datetime.now().year self.p("\t\t.word (+), {:d}".format(year)) self.p("\t\t.null $9e, format(' %d ', _il65_sysaddr), $3a, $8f, ' il65 by idj'") @@ -83,20 +79,10 @@ class CodeGenerator: self.p("_il65_sysaddr\t\t; assembly code starts here\n") else: self.p("; ---- program without sys call ----") - self.p("* = " + self.to_hex(self.parsed.start_address) + "\n") + self.p("* = " + Parser.to_hex(self.parsed.start_address) + "\n") if self.parsed.format == ProgramFormat.RAW: self.p("; ---- raw assembler program ----") - self.p("* = " + self.to_hex(self.parsed.start_address) + "\n") - - @staticmethod - def to_hex(number: int) -> str: - # 0..255 -> "$00".."$ff" - # 256..65536 -> "$0100".."$ffff" - if 0 <= number < 0x100: - return "${:02x}".format(number) - if number < 0x10000: - return "${:04x}".format(number) - raise OverflowError(number) + self.p("* = " + Parser.to_hex(self.parsed.start_address) + "\n") @staticmethod def to_mflpt5(number: float) -> bytearray: @@ -232,7 +218,7 @@ class CodeGenerator: if subroutines: self.p("\n; external subroutines") for subdef in subroutines: - self.p("\t\t{:s} = {:s}".format(subdef.name, self.to_hex(subdef.address))) + self.p("\t\t{:s} = {:s}".format(subdef.name, Parser.to_hex(subdef.address))) self.p("; end external subroutines") for stmt in block.statements: self.generate_statement(stmt) @@ -245,14 +231,14 @@ class CodeGenerator: for vardef in mem_vars: # create a definition for variables at a specific place in memory (memory-mapped) if vardef.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT): - self.p("\t\t{:s} = {:s}\t; {:s}".format(vardef.name, self.to_hex(vardef.address), vardef.type.name.lower())) + self.p("\t\t{:s} = {:s}\t; {:s}".format(vardef.name, Parser.to_hex(vardef.address), vardef.type.name.lower())) elif vardef.type == DataType.BYTEARRAY: - self.p("\t\t{:s} = {:s}\t; array of {:d} bytes".format(vardef.name, self.to_hex(vardef.address), vardef.length)) + self.p("\t\t{:s} = {:s}\t; array of {:d} bytes".format(vardef.name, Parser.to_hex(vardef.address), vardef.length)) elif vardef.type == DataType.WORDARRAY: - self.p("\t\t{:s} = {:s}\t; array of {:d} words".format(vardef.name, self.to_hex(vardef.address), vardef.length)) + self.p("\t\t{:s} = {:s}\t; array of {:d} words".format(vardef.name, Parser.to_hex(vardef.address), vardef.length)) elif vardef.type == DataType.MATRIX: self.p("\t\t{:s} = {:s}\t; matrix {:d} by {:d} = {:d} bytes" - .format(vardef.name, self.to_hex(vardef.address), vardef.matrixsize[0], vardef.matrixsize[1], vardef.length)) + .format(vardef.name, Parser.to_hex(vardef.address), vardef.matrixsize[0], vardef.matrixsize[1], vardef.length)) else: raise ValueError("invalid var type") non_mem_vars = [vi for vi in block.symbols.iter_variables() if vi.allocate] @@ -263,12 +249,12 @@ class CodeGenerator: if vardef.type in (DataType.BYTE, DataType.WORD, DataType.FLOAT): if vardef.address: assert block.name == "ZP", "only ZP-variables can be put on an address" - self.p("\t\t{:s} = {:s}".format(vardef.name, self.to_hex(vardef.address))) + self.p("\t\t{:s} = {:s}".format(vardef.name, Parser.to_hex(vardef.address))) else: if vardef.type == DataType.BYTE: - self.p("{:s}\t\t.byte {:s}".format(vardef.name, self.to_hex(int(vardef.value)))) + self.p("{:s}\t\t.byte {:s}".format(vardef.name, Parser.to_hex(int(vardef.value)))) elif vardef.type == DataType.WORD: - self.p("{:s}\t\t.word {:s}".format(vardef.name, self.to_hex(int(vardef.value)))) + self.p("{:s}\t\t.word {:s}".format(vardef.name, Parser.to_hex(int(vardef.value)))) elif vardef.type == DataType.FLOAT: self.p("{:s}\t\t.byte ${:02x}, ${:02x}, ${:02x}, ${:02x}, ${:02x}" .format(vardef.name, *self.to_mflpt5(float(vardef.value)))) @@ -312,7 +298,7 @@ class CodeGenerator: else: raise CodeError("unknown variable type " + str(vardef.type)) - def generate_statement(self, stmt: ParseResult._Stmt) -> None: + def generate_statement(self, stmt: ParseResult._AstNode) -> None: if isinstance(stmt, ParseResult.ReturnStmt): if stmt.a: if isinstance(stmt.a, ParseResult.IntegerValue): @@ -348,7 +334,7 @@ class CodeGenerator: else: self.p("\t\tde{:s}".format(stmt.what.register.lower())) elif isinstance(stmt.what, ParseResult.MemMappedValue): - r_str = stmt.what.name or self.to_hex(stmt.what.address) + r_str = stmt.what.name or Parser.to_hex(stmt.what.address) if stmt.what.datatype == DataType.BYTE: if stmt.howmuch == 1: self.p("\t\tinc " + r_str) @@ -375,51 +361,35 @@ class CodeGenerator: elif stmt.howmuch < 0: raise NotImplementedError("decr by > 1") # XXX elif isinstance(stmt, ParseResult.CallStmt): - is_indirect = False - if stmt.call_label: - call_target = stmt.call_label - if stmt.call_module: - call_target = stmt.call_module + "." + stmt.call_label - elif stmt.address is not None: - call_target = self.to_hex(stmt.address) - else: - assert stmt.indirect_pointer is not None - if isinstance(stmt.indirect_pointer, int): - call_target = self.to_hex(stmt.indirect_pointer) - else: - call_target = stmt.indirect_pointer - is_indirect = True - if stmt.subroutine: - assert not is_indirect - if stmt.subroutine.clobbered_registers: + # the argument assignments have already been generated via separate assignment statements. + if isinstance(stmt.targetdef, SubroutineDef): + assert not stmt.is_indirect + clobbered = set() # type: Set[str] + if stmt.targetdef.clobbered_registers: if stmt.preserve_regs: # @todo make this work with the separate assignment statements for the parameters.. :( - clobbered = stmt.subroutine.clobbered_registers + clobbered = stmt.targetdef.clobbered_registers + with self.preserving_registers(clobbered): + self.p("\t\tjsr " + stmt.target) + if stmt.is_goto: + self.p("\t\trts") + return + if stmt.is_indirect: + if stmt.is_goto: + # no need to preserve registers for a goto + if stmt.target in REGISTER_WORDS: + self.p("\t\tst{:s} {:s}".format(stmt.target[0].lower(), Parser.to_hex(Zeropage.SCRATCH_B1))) + self.p("\t\tst{:s} {:s}".format(stmt.target[1].lower(), Parser.to_hex(Zeropage.SCRATCH_B2))) + self.p("\t\tjmp ({:s})".format(Parser.to_hex(Zeropage.SCRATCH_B1))) else: - clobbered = set() - with self.preserving_registers(clobbered): - self.p("\t\tjsr " + call_target) - if stmt.is_goto: - self.p("\t\trts") - return - if stmt.is_goto: - if is_indirect: - if call_target in REGISTER_WORDS: - self.p("\t\tst{:s} {:s}".format(call_target[0].lower(), self.to_hex(Zeropage.SCRATCH_B1))) - self.p("\t\tst{:s} {:s}".format(call_target[1].lower(), self.to_hex(Zeropage.SCRATCH_B2))) - self.p("\t\tjmp ({:s})".format(self.to_hex(Zeropage.SCRATCH_B1))) - else: - self.p("\t\tjmp ({:s})".format(call_target)) + self.p("\t\tjmp ({:s})".format(stmt.target)) else: - self.p("\t\tjmp " + call_target) - else: - preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set() - with self.preserving_registers(preserve_regs): - if is_indirect: - if call_target in REGISTER_WORDS: + preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set() + with self.preserving_registers(preserve_regs): + if stmt.target in REGISTER_WORDS: if stmt.preserve_regs: # cannot use zp scratch - self.p("\t\tst{:s} ++".format(call_target[0].lower())) - self.p("\t\tst{:s} +++".format(call_target[1].lower())) + self.p("\t\tst{:s} ++".format(stmt.target[0].lower())) + self.p("\t\tst{:s} +++".format(stmt.target[1].lower())) self.p("\t\tjsr +") self.p("\t\tjmp ++++") self.p("+\t\tjmp (+)") @@ -427,19 +397,22 @@ class CodeGenerator: self.p("+\t\t.byte 0\t; hi") self.p("+") else: - self.p("\t\tst{:s} {:s}".format(call_target[0].lower(), self.to_hex(Zeropage.SCRATCH_B1))) - self.p("\t\tst{:s} {:s}".format(call_target[1].lower(), self.to_hex(Zeropage.SCRATCH_B2))) + self.p("\t\tst{:s} {:s}".format(stmt.target[0].lower(), Parser.to_hex(Zeropage.SCRATCH_B1))) + self.p("\t\tst{:s} {:s}".format(stmt.target[1].lower(), Parser.to_hex(Zeropage.SCRATCH_B2))) self.p("\t\tjsr +") self.p("\t\tjmp ++") - self.p("+\t\tjmp ({:s})".format(self.to_hex(Zeropage.SCRATCH_B1))) + self.p("+\t\tjmp ({:s})".format(Parser.to_hex(Zeropage.SCRATCH_B1))) self.p("+") else: self.p("\t\tjsr +") self.p("\t\tjmp ++") - self.p("+\t\tjmp ({:s})".format(call_target)) + self.p("+\t\tjmp ({:s})".format(stmt.target)) self.p("+") - else: - self.p("\t\tjsr " + call_target) + else: + preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set() + with self.preserving_registers(preserve_regs): + self.p("\t\tjsr " + stmt.target) + elif isinstance(stmt, ParseResult.InlineAsm): self.p("\t\t; inline asm, src l. {:d}".format(stmt.lineno)) for line in stmt.asmlines: @@ -502,7 +475,7 @@ class CodeGenerator: def generate_store_immediate_float(self, mmv: ParseResult.MemMappedValue, floatvalue: float, mflpt: bytearray, emit_pha: bool=True) -> None: - target = mmv.name or self.to_hex(mmv.address) + target = mmv.name or Parser.to_hex(mmv.address) if emit_pha: self.p("\t\tpha\t\t\t; {:s} = {}".format(target, floatvalue)) else: @@ -515,7 +488,7 @@ class CodeGenerator: def generate_assign_reg_to_memory(self, lv: ParseResult.MemMappedValue, r_register: str) -> None: # Memory = Register - lv_string = lv.name or self.to_hex(lv.address) + lv_string = lv.name or Parser.to_hex(lv.address) if lv.datatype == DataType.BYTE: if len(r_register) > 1: raise CodeError("cannot assign register pair to single byte memory") @@ -633,7 +606,7 @@ class CodeGenerator: assign_target = symblock.label + "." + sym.name if symblock is not self.cur_block else lv.name lvdatatype = sym.type else: - assign_target = self.to_hex(lv.address) + assign_target = Parser.to_hex(lv.address) lvdatatype = lv.datatype r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.value) if lvdatatype == DataType.BYTE: @@ -676,19 +649,19 @@ class CodeGenerator: raise CodeError("can only assign a byte to a byte") with self.preserving_registers({'A'}): self.p("\t\tlda " + r_str) - self.p("\t\tsta " + (lv.name or self.to_hex(lv.address))) + self.p("\t\tsta " + (lv.name or Parser.to_hex(lv.address))) elif lv.datatype == DataType.WORD: if rvalue.datatype == DataType.BYTE: raise NotImplementedError # XXX with self.preserving_registers({'A'}): - l_str = lv.name or self.to_hex(lv.address) + l_str = lv.name or Parser.to_hex(lv.address) self.p("\t\tlda #0") self.p("\t\tsta " + l_str) self.p("\t\tlda " + r_str) self.p("\t\tsta {:s}+1".format(l_str)) elif rvalue.datatype == DataType.WORD: with self.preserving_registers({'A'}): - l_str = lv.name or self.to_hex(lv.address) + l_str = lv.name or Parser.to_hex(lv.address) self.p("\t\tlda {:s}".format(r_str)) self.p("\t\tsta {:s}".format(l_str)) self.p("\t\tlda {:s}+1".format(r_str)) @@ -704,7 +677,7 @@ class CodeGenerator: with self.preserving_registers({'A'}): self.p("\t\tlda #" + char_str) if not lv.name: - self.p("\t\tsta " + self.to_hex(lv.address)) + self.p("\t\tsta " + Parser.to_hex(lv.address)) return # assign char value to a memory location by symbol name symblock, sym = self.cur_block.lookup(lv.name) @@ -758,7 +731,7 @@ class CodeGenerator: if lv.datatype != DataType.WORD: raise CodeError("need word memory type for string address assignment") if rvalue.name: - assign_target = lv.name if lv.name else self.to_hex(lv.address) + assign_target = lv.name if lv.name else Parser.to_hex(lv.address) self.p("\t\tlda #<{:s}".format(rvalue.name)) self.p("\t\tsta " + assign_target) self.p("\t\tlda #>{:s}".format(rvalue.name)) @@ -821,38 +794,3 @@ class Assembler64Tass: subprocess.check_call(args) except subprocess.CalledProcessError as x: print("assembler failed with returncode", x.returncode) - - -def main() -> None: - description = "Compiler for IL65 language, code name 'Sick'" - ap = argparse.ArgumentParser(description=description) - ap.add_argument("-o", "--output", help="output directory") - ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile") - args = ap.parse_args() - assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm" - program_filename = os.path.splitext(args.sourcefile)[0] + ".prg" - if args.output: - os.makedirs(args.output, mode=0o700, exist_ok=True) - assembly_filename = os.path.join(args.output, os.path.split(assembly_filename)[1]) - program_filename = os.path.join(args.output, os.path.split(program_filename)[1]) - - print("\n" + description) - - pp = PreprocessingParser(args.sourcefile) - sourcelines, symbols = pp.preprocess() - symbols.print_table(True) - - p = Parser(args.sourcefile, args.output, sourcelines, ppsymbols=symbols) - parsed = p.parse() - if parsed: - opt = Optimizer(parsed) - parsed = opt.optimize() - cg = CodeGenerator(parsed) - cg.generate() - cg.optimize() - with open(assembly_filename, "wt") as out: - cg.write_assembly(out) - assembler = Assembler64Tass(parsed.format) - assembler.assemble(assembly_filename, program_filename) - print("Output file: ", program_filename) - print() diff --git a/il65/main.py b/il65/main.py new file mode 100644 index 000000000..02af875d8 --- /dev/null +++ b/il65/main.py @@ -0,0 +1,50 @@ +#! /usr/bin/env python3 + +""" +Intermediate Language for 6502/6510 microprocessors, codename 'Sick' +This is the main program that drives the rest. + +Written by Irmen de Jong (irmen@razorvine.net) +License: GNU GPL 3.0, see LICENSE +""" + +import os +import argparse +from .parse import Parser, Optimizer +from .preprocess import PreprocessingParser +from .codegen import CodeGenerator, Assembler64Tass + + +def main() -> None: + description = "Compiler for IL65 language, code name 'Sick'" + ap = argparse.ArgumentParser(description=description) + ap.add_argument("-o", "--output", help="output directory") + ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile") + args = ap.parse_args() + assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm" + program_filename = os.path.splitext(args.sourcefile)[0] + ".prg" + if args.output: + os.makedirs(args.output, mode=0o700, exist_ok=True) + assembly_filename = os.path.join(args.output, os.path.split(assembly_filename)[1]) + program_filename = os.path.join(args.output, os.path.split(program_filename)[1]) + + print("\n" + description) + + pp = PreprocessingParser(args.sourcefile) + sourcelines, symbols = pp.preprocess() + symbols.print_table(True) + + p = Parser(args.sourcefile, args.output, sourcelines, ppsymbols=symbols) + parsed = p.parse() + if parsed: + opt = Optimizer(parsed) + parsed = opt.optimize() + cg = CodeGenerator(parsed) + cg.generate() + cg.optimize() + with open(assembly_filename, "wt") as out: + cg.write_assembly(out) + assembler = Assembler64Tass(parsed.format) + assembler.assemble(assembly_filename, program_filename) + print("Output file: ", program_filename) + print() diff --git a/il65/parse.py b/il65/parse.py index 9cf952ec9..9b1df1fd0 100644 --- a/il65/parse.py +++ b/il65/parse.py @@ -42,7 +42,7 @@ class ParseResult: self.sourceref = sourceref self.address = 0 self.name = name - self.statements = [] # type: List[ParseResult._Stmt] + self.statements = [] # type: List[ParseResult._AstNode] self.symbols = SymbolTable(name, parent_scope, self) @property @@ -67,7 +67,7 @@ class ParseResult: return None, None def flatten_statement_list(self) -> None: - if all(isinstance(stmt, ParseResult._Stmt) for stmt in self.statements): + if all(isinstance(stmt, ParseResult._AstNode) for stmt in self.statements): # this is the common case return statements = [] @@ -75,7 +75,7 @@ class ParseResult: if isinstance(stmt, (tuple, list)): statements.extend(stmt) else: - assert isinstance(stmt, ParseResult._Stmt) + assert isinstance(stmt, ParseResult._AstNode) statements.append(stmt) self.statements = statements @@ -90,13 +90,6 @@ class ParseResult: return False, "cannot assign to a constant" return False, "incompatible value for assignment" - class PlaceholderSymbol(Value): - def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: - return True, "" - - def __str__(self): - return "".format(self.name) - class IntegerValue(Value): def __init__(self, value: Optional[int], *, datatype: DataType=None, name: str=None) -> None: if type(value) is int: @@ -213,8 +206,6 @@ class ParseResult: if range_error: return False, range_error return True, "" - if isinstance(other, ParseResult.PlaceholderSymbol): - return True, "" if self.datatype == DataType.BYTE: if other.datatype != DataType.BYTE: return False, "(unsigned) byte required" @@ -251,8 +242,6 @@ class ParseResult: def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: if self.constant: return False, "cannot assign to a constant" - if isinstance(other, ParseResult.PlaceholderSymbol): - return True, "" if self.datatype == DataType.BYTE: if isinstance(other, (ParseResult.IntegerValue, ParseResult.RegisterValue, ParseResult.MemMappedValue)): if other.datatype == DataType.BYTE: @@ -286,16 +275,15 @@ class ParseResult: return False, "(unsigned) word required" return False, "incompatible value for assignment" - class _Stmt: - def resolve_symbol_references(self, parser: 'Parser') -> None: # @todo don't need this when using ppsymbols? - pass + class _AstNode: # @todo merge Value with this? + pass - class Label(_Stmt): + class Label(_AstNode): def __init__(self, name: str, lineno: int) -> None: self.name = name self.lineno = lineno - class AssignmentStmt(_Stmt): + class AssignmentStmt(_AstNode): def __init__(self, leftvalues: List['ParseResult.Value'], right: 'ParseResult.Value', lineno: int) -> None: self.leftvalues = leftvalues self.right = right @@ -304,32 +292,6 @@ class ParseResult: def __str__(self): return "".format(str(self.right), ",".join(str(lv) for lv in self.leftvalues)) - def resolve_symbol_references(self, parser: 'Parser') -> None: - cur_block = parser.cur_block - if isinstance(self.right, ParseResult.PlaceholderSymbol): - value = parser.parse_expression(self.right.name, cur_block) - if isinstance(value, ParseResult.PlaceholderSymbol): - raise ParseError("cannot resolve rvalue symbol: " + self.right.name, "", cur_block.sourceref) - self.right = value - lv_resolved = [] - for lv in self.leftvalues: - if isinstance(lv, ParseResult.PlaceholderSymbol): - value = parser.parse_expression(lv.name, cur_block) - if isinstance(value, ParseResult.PlaceholderSymbol): - raise ParseError("cannot resolve lvalue symbol: " + lv.name, "", cur_block.sourceref) - lv_resolved.append(value) - else: - lv_resolved.append(lv) - self.leftvalues = lv_resolved - if any(isinstance(lv, ParseResult.PlaceholderSymbol) for lv in self.leftvalues) or \ - isinstance(self.right, ParseResult.PlaceholderSymbol): - raise ParseError("unresolved placeholders in assignment statement", "", cur_block.sourceref) - # check assignability again - for lv in self.leftvalues: - assignable, reason = lv.assignable_from(self.right) - if not assignable: - raise ParseError("cannot assign {0} to {1}; {2}".format(self.right, lv, reason), "", cur_block.sourceref) - _immediate_string_vars = {} # type: Dict[str, Tuple[str, str]] def desugar_immediate_string(self, parser: 'Parser') -> None: @@ -349,7 +311,7 @@ class ParseResult: self.right.name = stringvar_name self._immediate_string_vars[self.right.value] = (cur_block.name, stringvar_name) - class ReturnStmt(_Stmt): + class ReturnStmt(_AstNode): def __init__(self, a: Optional['ParseResult.Value']=None, x: Optional['ParseResult.Value']=None, y: Optional['ParseResult.Value']=None) -> None: @@ -357,80 +319,32 @@ class ParseResult: self.x = x self.y = y - def resolve_symbol_references(self, parser: 'Parser') -> None: - if isinstance(self.a, ParseResult.PlaceholderSymbol) or \ - isinstance(self.x, ParseResult.PlaceholderSymbol) or \ - isinstance(self.y, ParseResult.PlaceholderSymbol): - cur_block = parser.cur_block - raise ParseError("unresolved placeholders in return statement", "", cur_block.sourceref) - - class IncrDecrStmt(_Stmt): + class IncrDecrStmt(_AstNode): def __init__(self, what: 'ParseResult.Value', howmuch: int) -> None: self.what = what self.howmuch = howmuch - def resolve_symbol_references(self, parser: 'Parser') -> None: - if isinstance(self.what, ParseResult.PlaceholderSymbol): - cur_block = parser.cur_block - value = parser.parse_expression(self.what.name, cur_block) - if isinstance(value, ParseResult.PlaceholderSymbol): - raise ParseError("cannot resolve symbol: " + self.what.name, "", cur_block.sourceref) - self.what = value - - class CallStmt(_Stmt): - def __init__(self, lineno: int, address: Optional[int]=None, unresolved: str=None, - arguments: List[Tuple[str, Any]]=None, is_goto: bool=False, - indirect_pointer: Optional[Union[int, str]]=None, preserve_regs: bool=True) -> None: - self.subroutine = None # type: SubroutineDef - self.unresolved = unresolved - self.is_goto = is_goto - self.preserve_regs = preserve_regs - self.call_module = "" - self.call_label = "" + class CallStmt(_AstNode): + def __init__(self, lineno: int, target: str, targetdef: Optional[SymbolDefinition]=None, *, + address: Optional[int]=None, arguments: List[Tuple[str, Any]]=None, + is_goto: bool=False, is_indirect: bool=False, preserve_regs: bool=True) -> None: self.lineno = lineno - self.arguments = arguments + self._target = target + self.targetdef = targetdef self.address = address - self.indirect_pointer = indirect_pointer - if self.indirect_pointer: - assert self.subroutine is None and self.address is None + self.arguments = arguments + self.is_goto = is_goto + self.is_indirect = is_indirect + self.preserve_regs = preserve_regs - def resolve_symbol_references(self, parser: 'Parser') -> None: - if self.unresolved: - cur_block = parser.cur_block - symblock, identifier = cur_block.lookup(self.unresolved) - if not identifier: - raise parser.PError("unknown symbol '{:s}'".format(self.unresolved), self.lineno) - if isinstance(identifier, SubroutineDef): - self.subroutine = identifier - if self.arguments is not None and len(self.arguments) != len(self.subroutine.parameters): - raise parser.PError("invalid number of arguments ({:d}, expected {:d})" - .format(len(self.arguments), len(self.subroutine.parameters)), self.lineno) - arguments = [] - for i, (argname, value) in enumerate(self.arguments or []): - pname, preg = self.subroutine.parameters[i] - if argname: - if argname != preg: - raise parser.PError("parameter mismatch ({:s}, expected {:s})".format(argname, preg), self.lineno) - else: - argname = preg - arguments.append((argname, value)) - self.arguments = arguments - elif isinstance(identifier, LabelDef): - pass - else: - raise parser.PError("invalid call target (should be label or address)", self.lineno) - if cur_block is symblock: - self.call_module, self.call_label = "", identifier.name - else: - self.call_module = symblock.label - self.call_label = identifier.name - self.unresolved = None + @property + def target(self) -> str: + return self._target if self._target else Parser.to_hex(self.address) - def desugar_call_arguments(self, parser: 'Parser') -> List['ParseResult._Stmt']: - assert not self.unresolved + def desugar_call_arguments(self, parser: 'Parser') -> List['ParseResult._AstNode']: if not self.arguments: return [self] - statements = [] # type: List[ParseResult._Stmt] + statements = [] # type: List[ParseResult._AstNode] for name, value in self.arguments: assert name is not None, "call argument should have a parameter name assigned" assignment = parser.parse_assignment("{:s}={:s}".format(name, value)) @@ -439,7 +353,7 @@ class ParseResult: statements.append(self) return statements - class InlineAsm(_Stmt): + class InlineAsm(_AstNode): def __init__(self, lineno: int, asmlines: List[str]) -> None: self.lineno = lineno self.asmlines = asmlines @@ -468,7 +382,7 @@ class Parser: self.cur_lineidx = -1 self.cur_block = None # type: ParseResult.Block self.root_scope = SymbolTable("", None, None) - self.ppsymbols = ppsymbols # symboltable from preprocess phase # @todo use this + self.ppsymbols = ppsymbols # symboltable from preprocess phase self.print_block_parsing = True def load_source(self, filename: str) -> List[Tuple[int, str]]: @@ -503,9 +417,8 @@ class Parser: raise # XXX temporary solution to get stack trace info in the event of parse errors def parse_file(self) -> ParseResult: - print("\nparsing (pass 1)", self.sourceref.file) + print("\nparsing", self.sourceref.file) self._parse_1() - print("\nparsing (pass 2)", self.sourceref.file) self._parse_2() return self.result @@ -549,12 +462,7 @@ class Parser: self.sourceref.column = 0 for block in self.result.blocks: self.cur_block = block - # resolve labels and names that were referencing unknown symbols - block.flatten_statement_list() - for index, stmt in enumerate(list(block.statements)): - stmt.resolve_symbol_references(self) # create parameter loads for calls - block.flatten_statement_list() for index, stmt in enumerate(list(block.statements)): if isinstance(stmt, ParseResult.CallStmt): self.sourceref.line = stmt.lineno @@ -564,8 +472,8 @@ class Parser: block.statements[index] = statements[0] else: block.statements[index] = statements # type: ignore - # desugar immediate string value assignments block.flatten_statement_list() + # desugar immediate string value assignments for index, stmt in enumerate(list(block.statements)): if isinstance(stmt, ParseResult.AssignmentStmt): self.sourceref.line = stmt.lineno @@ -591,7 +499,7 @@ class Parser: return self.lines[self.cur_lineidx + 1][1] return "" - def PError(self, message: str, lineno: Optional[int]=None) -> ParseError: + def PError(self, message: str, lineno: int=0, column: int=0) -> ParseError: sourceline = "" if lineno: for num, text in self.lines: @@ -600,10 +508,36 @@ class Parser: break else: lineno = self.sourceref.line + column = self.sourceref.column self.cur_lineidx = min(self.cur_lineidx, len(self.lines) - 1) if self.cur_lineidx: sourceline = self.lines[self.cur_lineidx][1].strip() - return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno)) + return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column)) + + def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: + if typestr == ".byte": + return DataType.BYTE, 1, None + elif typestr == ".word": + return DataType.WORD, 1, None + elif typestr == ".float": + return DataType.FLOAT, 1, None + elif typestr.endswith("text"): + if typestr == ".text": + return DataType.STRING, 0, None + elif typestr == ".ptext": + return DataType.STRING_P, 0, None + elif typestr == ".stext": + return DataType.STRING_S, 0, None + elif typestr == ".pstext": + return DataType.STRING_PS, 0, None + elif typestr.startswith(".array(") and typestr.endswith(")"): + return DataType.BYTEARRAY, self._size_from_arraydecl(typestr), None + elif typestr.startswith(".wordarray(") and typestr.endswith(")"): + return DataType.WORDARRAY, self._size_from_arraydecl(typestr), None + elif typestr.startswith(".matrix(") and typestr.endswith(")"): + dimensions = self._size_from_matrixdecl(typestr) + return DataType.MATRIX, dimensions[0] * dimensions[1], dimensions + raise self.PError("invalid data type: " + typestr) def parse_header(self) -> None: self.result.with_sys = False @@ -645,7 +579,7 @@ class Parser: raise self.PError("multiple occurrences of 'address'") _, _, arg = line.partition(" ") try: - self.result.start_address = parse_expr_as_int(arg, None, self.sourceref) + self.result.start_address = parse_expr_as_int(arg, None, None, self.sourceref) except ParseError: raise self.PError("invalid address") if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: @@ -692,6 +626,7 @@ class Parser: result = parser.parse() print("\ncontinuing", self.sourceref.file) if result: + # merge the symbol table of the imported file into our own self.root_scope.merge_roots(parser.root_scope) self.result.merge(result) return @@ -738,7 +673,7 @@ class Parser: continue else: try: - block_address = parse_expr_as_int(arg, None, self.sourceref) + block_address = parse_expr_as_int(arg, self.cur_block.symbols, self.ppsymbols, self.sourceref) except ParseError: raise self.PError("Invalid block address") if block_address == 0 or (block_address < 0x0200 and not is_zp_block): @@ -820,7 +755,7 @@ class Parser: def parse_memory_def(self, line: str, is_zeropage: bool=False) -> None: varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "memory") - memaddress = parse_expr_as_int(valuetext, self.cur_block.symbols, self.sourceref) + memaddress = parse_expr_as_int(valuetext, self.cur_block.symbols, self.ppsymbols, self.sourceref) if is_zeropage and memaddress > 0xff: raise self.PError("address must lie in zeropage $00-$ff") try: @@ -833,7 +768,7 @@ class Parser: varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "const") if dimensions: raise self.PError("cannot declare a constant matrix") - value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.sourceref) + value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.ppsymbols, self.sourceref) _, value = self.coerce_value(self.sourceref, datatype, value) try: self.cur_block.symbols.define_constant(varname, self.sourceref, datatype, length=length, value=value) @@ -860,7 +795,7 @@ class Parser: raise self.PError("duplicates in parameter names") results = {match.group("name") for match in re.finditer(r"\s*(?P(?:\w+)\??)\s*(?:,|$)", resultlist)} try: - address = parse_expr_as_int(address_str, None, self.sourceref) + address = parse_expr_as_int(address_str, self.cur_block.symbols, self.ppsymbols, self.sourceref) except ParseError: raise self.PError("invalid subroutine address") try: @@ -868,34 +803,9 @@ class Parser: except SymbolError as x: raise self.PError(str(x)) from x - def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: - if typestr == ".byte": - return DataType.BYTE, 1, None - elif typestr == ".word": - return DataType.WORD, 1, None - elif typestr == ".float": - return DataType.FLOAT, 1, None - elif typestr.endswith("text"): - if typestr == ".text": - return DataType.STRING, 0, None - elif typestr == ".ptext": - return DataType.STRING_P, 0, None - elif typestr == ".stext": - return DataType.STRING_S, 0, None - elif typestr == ".pstext": - return DataType.STRING_PS, 0, None - elif typestr.startswith(".array(") and typestr.endswith(")"): - return DataType.BYTEARRAY, self._size_from_arraydecl(typestr), None - elif typestr.startswith(".wordarray(") and typestr.endswith(")"): - return DataType.WORDARRAY, self._size_from_arraydecl(typestr), None - elif typestr.startswith(".matrix(") and typestr.endswith(")"): - dimensions = self._size_from_matrixdecl(typestr) - return DataType.MATRIX, dimensions[0] * dimensions[1], dimensions - raise self.PError("invalid data type: " + typestr) - def parse_var_def(self, line: str) -> None: varname, datatype, length, dimensions, valuetext = self.parse_def_common(line, "var", False) - value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.sourceref) + value = parse_expr_as_primitive(valuetext, self.cur_block.symbols, self.ppsymbols, self.sourceref) _, value = self.coerce_value(self.sourceref, datatype, value) try: self.cur_block.symbols.define_variable(varname, self.sourceref, datatype, @@ -931,7 +841,7 @@ class Parser: datatype, length, matrix_dimensions = self.get_datatype(args[1]) return varname, datatype, length, matrix_dimensions, valuetext - def parse_statement(self, line: str) -> ParseResult._Stmt: + def parse_statement(self, line: str) -> ParseResult._AstNode: # check if we have a subroutine call using () syntax match = re.match(r"^(?P[\w\.]+)\s*(?P[!]?)\s*\((?P.*)\)\s*$", line) if match: @@ -964,10 +874,10 @@ class Parser: def parse_call_or_go(self, line: str, what: str) -> ParseResult.CallStmt: args = line.split(maxsplit=2) if len(args) == 2: - subname, argumentstr, = args[1], "" + targetstr, argumentstr, = args[1], "" arguments = None elif len(args) == 3: - subname, argumentstr = args[1], args[2] + targetstr, argumentstr = args[1], args[2] arguments = [] for part in argumentstr.split(','): pname, sep, pvalue = part.partition('=') @@ -980,50 +890,80 @@ class Parser: else: raise self.PError("invalid call/go arguments") address = None - if subname[0] == '[' and subname[-1] == ']': + target = None + if targetstr[0] == '[' and targetstr[-1] == ']': # indirect call to address in register pair or memory location - pointerstr = subname[1:-1].strip() - indirect_pointer = pointerstr # type: Union[int, str] - if pointerstr[0] == '#': - _, symbol = self.cur_block.lookup(pointerstr[1:]) - indirect_pointer = self.cur_block.symbols.get_address(pointerstr[1:]) - symboltype = getattr(symbol, "type", None) + indirect = True + targetstr = targetstr[1:-1].strip() + if targetstr[0] == '#': + _, target = self.cur_block.lookup(targetstr[1:]) + assert isinstance(target, SymbolDefinition) + targetstr = targetstr[1:] + symboltype = getattr(target, "type", None) if symboltype and symboltype != DataType.WORD: raise self.PError("invalid call target (should contain 16-bit)") + address = self.cur_block.symbols.get_address(targetstr) else: - # the pointer should be a number or a - _, symbol = self.cur_block.lookup(pointerstr) - if isinstance(symbol, VariableDef): - if symbol.address is not None: + # the pointer should be a number or a label + _, target = self.cur_block.lookup(targetstr) + if isinstance(target, VariableDef): + # checks + if target.address is not None: raise self.PError("invalid call target (should be label or address)") - if symbol.type != DataType.WORD: + if target.type != DataType.WORD: raise self.PError("invalid call target (should be 16-bit address)") + else: + indirect = False + # subname can be a label, or an immediate address (but not #symbol - use subx for that) + if targetstr[0] == '#': + raise self.PError("to call a subroutine, use a subx definition instead") + try: + address = self.parse_integer(targetstr) + targetstr = None + except ValueError: + symblock, target = self.lookup(targetstr) + same_block = symblock and symblock.name == self.cur_block.name + if isinstance(target, LabelDef): + targetstr = target.name if same_block else target.blockname + '.' + target.name + elif isinstance(target, ConstantDef): + if target.type == DataType.WORD: + address = target.value # type: ignore + targetstr = None + else: + raise self.PError("call requires word constant") + elif isinstance(target, VariableDef): + raise self.PError("can only call a constant expression (label, address, const)") # @todo dynamic + elif isinstance(target, SubroutineDef): + # verify subroutine arguments + if arguments is not None and len(arguments) != len(target.parameters): + raise self.PError("invalid number of arguments ({:d}, expected {:d})" + .format(len(arguments), len(target.parameters))) + args_with_pnames = [] + for i, (argname, value) in enumerate(arguments or []): + pname, preg = target.parameters[i] + if argname: + if argname != preg: + raise self.PError("parameter mismatch ({:s}, expected {:s})".format(argname, preg)) + else: + argname = preg + args_with_pnames.append((argname, value)) + arguments = args_with_pnames + else: + raise TypeError("invalid target type") + if isinstance(target, (type(None), SymbolDefinition)): if what == "go": - return ParseResult.CallStmt(self.sourceref.line, is_goto=True, indirect_pointer=indirect_pointer) + return ParseResult.CallStmt(self.sourceref.line, targetstr, target, address=address, + is_goto=True, is_indirect=indirect) elif what == "call": - return ParseResult.CallStmt(self.sourceref.line, indirect_pointer=indirect_pointer) + return ParseResult.CallStmt(self.sourceref.line, targetstr, target, address=address, + arguments=arguments, is_indirect=indirect) elif what == "fcall": - return ParseResult.CallStmt(self.sourceref.line, indirect_pointer=indirect_pointer, preserve_regs=False) + return ParseResult.CallStmt(self.sourceref.line, targetstr, target, address=address, + arguments=arguments, is_indirect=indirect, preserve_regs=False) else: raise ValueError("invalid what") else: - # subname can be a label, or an immediate address (but not #symbol - use subx for that) - if subname[0] == '#': - raise self.PError("to call a subroutine, use a subx definition instead") - else: - try: - address = self.parse_integer(subname) - subname = None - except ValueError: - pass - if what == "go": - return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, is_goto=True) - elif what == "call": - return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, arguments=arguments) - elif what == "fcall": - return ParseResult.CallStmt(self.sourceref.line, address, unresolved=subname, arguments=arguments, preserve_regs=False) - else: - raise ValueError("invalid what") + raise TypeError("target should be a symboldef") def parse_integer(self, text: str) -> int: text = text.strip() @@ -1109,11 +1049,11 @@ class Parser: return ParseResult.InlineAsm(self.sourceref.line, lines) elif aline[0] == "asmbinary": if len(aline) == 4: - offset = parse_expr_as_int(aline[2], None, self.sourceref) - length = parse_expr_as_int(aline[3], None, self.sourceref) + offset = parse_expr_as_int(aline[2], None, None, self.sourceref) + length = parse_expr_as_int(aline[3], None, None, self.sourceref) lines = ['\t.binary "{:s}", ${:04x}, ${:04x}'.format(filename, offset, length)] elif len(aline) == 3: - offset = parse_expr_as_int(aline[2], None, self.sourceref) + offset = parse_expr_as_int(aline[2], None, None, self.sourceref) lines = ['\t.binary "{:s}", ${:04x}'.format(filename, offset)] elif len(aline) == 2: lines = ['\t.binary "{:s}"'.format(filename)] @@ -1123,25 +1063,22 @@ class Parser: else: raise self.PError("invalid statement") - def parse_expression(self, text: str, cur_block: Optional[ParseResult.Block]=None) -> ParseResult.Value: + def parse_expression(self, text: str) -> ParseResult.Value: # parse an expression into whatever it is (primitive value, register, memory, register, etc) - cur_block = cur_block or self.cur_block text = text.strip() if not text: raise self.PError("value expected") if text[0] == '#': # take the pointer (memory address) from the thing that follows this - expression = self.parse_expression(text[1:], cur_block) + expression = self.parse_expression(text[1:]) if isinstance(expression, ParseResult.StringValue): return expression elif isinstance(expression, ParseResult.MemMappedValue): return ParseResult.IntegerValue(expression.address, datatype=DataType.WORD, name=expression.name) - elif isinstance(expression, ParseResult.PlaceholderSymbol): - raise self.PError("cannot take the address of an unknown symbol") else: raise self.PError("cannot take the address of this type") elif text[0] in "-.0123456789$%": - number = parse_expr_as_number(text, None, self.sourceref) + number = parse_expr_as_number(text, self.cur_block.symbols, self.ppsymbols, self.sourceref) try: if type(number) is int: return ParseResult.IntegerValue(int(number)) @@ -1156,7 +1093,7 @@ class Parser: elif text in REGISTER_BYTES: return ParseResult.RegisterValue(text, DataType.BYTE) elif (text.startswith("'") and text.endswith("'")) or (text.startswith('"') and text.endswith('"')): - strvalue = parse_expr_as_string(text, None, self.sourceref) + strvalue = parse_expr_as_string(text, self.cur_block.symbols, self.ppsymbols, self.sourceref) if len(strvalue) == 1: petscii_code = char_to_bytevalue(strvalue) return ParseResult.IntegerValue(petscii_code) @@ -1166,13 +1103,10 @@ class Parser: elif text == "false": return ParseResult.IntegerValue(0) elif self.is_identifier(text): - symblock, sym = cur_block.lookup(text) - if sym is None: - # symbols is not (yet) known, store a placeholder to resolve later in parse pass 2 - return ParseResult.PlaceholderSymbol(None, text) - elif isinstance(sym, (VariableDef, ConstantDef)): + symblock, sym = self.lookup(text) + if isinstance(sym, (VariableDef, ConstantDef)): constant = isinstance(sym, ConstantDef) - if cur_block is symblock: + if self.cur_block is symblock: symbolname = sym.name else: symbolname = "{:s}.{:s}".format(sym.blockname, sym.name) @@ -1204,10 +1138,7 @@ class Parser: float_type = True num_or_name = num_or_name[:-6] if num_or_name.isidentifier(): - try: - sym = cur_block.symbols[num_or_name] # type: ignore - except KeyError: - raise self.PError("unknown symbol (2): " + num_or_name) + _, sym = self.lookup(num_or_name) if isinstance(sym, ConstantDef): if sym.type == DataType.BYTE and (word_type or float_type): raise self.PError("byte value required") @@ -1226,7 +1157,7 @@ class Parser: else: raise self.PError("invalid symbol type used as lvalue of assignment (3)") else: - addr = parse_expr_as_int(num_or_name, None, self.sourceref) + addr = parse_expr_as_int(num_or_name, self.cur_block.symbols, self.ppsymbols, self.sourceref) if word_type: return ParseResult.MemMappedValue(addr, DataType.WORD, length=1) elif float_type: @@ -1244,8 +1175,22 @@ class Parser: return blockname.isidentifier() and name.isidentifier() return False + def lookup(self, dottedname: str) -> Tuple[ParseResult.Block, Union[SymbolDefinition, SymbolTable]]: + symblock, sym = self.cur_block.lookup(dottedname) + if sym is None: + # symbol is not (yet) known in current block, see if the ppsymbols know about it + if '.' not in dottedname: + dottedname = self.cur_block.name + '.' + dottedname + try: + symtable, sym = self.ppsymbols.lookup(dottedname) + assert dottedname.startswith(symtable.name) + symblock = None # the block might not have been parsed yet, so just return this instead + except (LookupError, SymbolError) as x: + raise self.PError(str(x)) + return symblock, sym + def _size_from_arraydecl(self, decl: str) -> int: - return parse_expr_as_int(decl[:-1].split("(")[-1], self.cur_block.symbols, self.sourceref) + return parse_expr_as_int(decl[:-1].split("(")[-1], self.cur_block.symbols, self.ppsymbols, self.sourceref) def _size_from_matrixdecl(self, decl: str) -> Tuple[int, int]: dimensions = decl[:-1].split("(")[-1] @@ -1253,8 +1198,8 @@ class Parser: xs, ys = dimensions.split(",") except ValueError: raise self.PError("invalid matrix dimensions") - return (parse_expr_as_int(xs, self.cur_block.symbols, self.sourceref), - parse_expr_as_int(ys, self.cur_block.symbols, self.sourceref)) + return (parse_expr_as_int(xs, self.cur_block.symbols, self.ppsymbols, self.sourceref), + parse_expr_as_int(ys, self.cur_block.symbols, self.ppsymbols, self.sourceref)) def coerce_value(self, sourceref: SourceRef, datatype: DataType, value: PrimitiveType) -> Tuple[bool, PrimitiveType]: # if we're a BYTE type, and the value is a single character, convert it to the numeric value @@ -1269,6 +1214,16 @@ class Parser: return True, int(value) return False, value + @staticmethod + def to_hex(number: int) -> str: + # 0..255 -> "$00".."$ff" + # 256..65536 -> "$0100".."$ffff" + if 0 <= number < 0x100: + return "${:02x}".format(number) + if number < 0x10000: + return "${:04x}".format(number) + raise OverflowError(number) + def psplit(self, sentence: str, separators: str=" \t", lparen: str="(", rparen: str=")") -> List[str]: """split a sentence but not on separators within parenthesis""" nb_brackets = 0 @@ -1304,20 +1259,9 @@ class Optimizer: self.optimize_multiassigns(block) return self.parsed - def optimize_multiassigns(self, block: ParseResult.Block) -> None: - # optimize multi-assign statements. - for stmt in block.statements: - if isinstance(stmt, ParseResult.AssignmentStmt) and len(stmt.leftvalues) > 1: - # remove duplicates - lvalues = list(set(stmt.leftvalues)) - if len(lvalues) != len(stmt.leftvalues): - print("{:s}:{:d}: removed duplicate assignment targets".format(block.sourceref.file, stmt.lineno)) - # change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) - stmt.leftvalues = list(sorted(lvalues, key=value_sortkey)) - def combine_assignments_into_multi(self, block: ParseResult.Block) -> None: # fold multiple consecutive assignments with the same rvalue into one multi-assignment - statements = [] # type: List[ParseResult._Stmt] + statements = [] # type: List[ParseResult._AstNode] multi_assign_statement = None for stmt in block.statements: if isinstance(stmt, ParseResult.AssignmentStmt): @@ -1337,6 +1281,17 @@ class Optimizer: statements.append(multi_assign_statement) block.statements = statements + def optimize_multiassigns(self, block: ParseResult.Block) -> None: + # optimize multi-assign statements. + for stmt in block.statements: + if isinstance(stmt, ParseResult.AssignmentStmt) and len(stmt.leftvalues) > 1: + # remove duplicates + lvalues = list(set(stmt.leftvalues)) + if len(lvalues) != len(stmt.leftvalues): + print("{:s}:{:d}: removed duplicate assignment targets".format(block.sourceref.file, stmt.lineno)) + # change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) + stmt.leftvalues = list(sorted(lvalues, key=value_sortkey)) + def value_sortkey(value: ParseResult.Value) -> int: if isinstance(value, ParseResult.RegisterValue): diff --git a/il65/preprocess.py b/il65/preprocess.py index 767f7ba2b..a8d301073 100644 --- a/il65/preprocess.py +++ b/il65/preprocess.py @@ -43,7 +43,7 @@ class PreprocessingParser(Parser): def parse_asminclude(self, line: str) -> ParseResult.InlineAsm: return ParseResult.InlineAsm(self.sourceref.line, []) - def parse_statement(self, line: str) -> ParseResult._Stmt: + def parse_statement(self, line: str) -> ParseResult._AstNode: return None # type: ignore def parse_var_def(self, line: str) -> None: diff --git a/il65/symbols.py b/il65/symbols.py index dfe953cea..393d9dc43 100644 --- a/il65/symbols.py +++ b/il65/symbols.py @@ -288,10 +288,10 @@ class SymbolTable: raise SymbolError("can only take address of memory mapped variables") return symbol.address - def as_eval_dict(self) -> Dict[str, Any]: + def as_eval_dict(self, ppsymbols: 'SymbolTable') -> Dict[str, Any]: # return a dictionary suitable to be passed as locals or globals to eval() if self.eval_dict is None: - d = Eval_symbol_dict(self) + d = EvalSymbolDict(self, ppsymbols) self.eval_dict = d # type: ignore return self.eval_dict @@ -438,11 +438,12 @@ class SymbolTable: print() -class Eval_symbol_dict(dict): - def __init__(self, symboltable: SymbolTable, constants: bool=True) -> None: +class EvalSymbolDict(dict): + def __init__(self, symboltable: SymbolTable, ppsymbols: SymbolTable, constants: bool=True) -> None: super().__init__() self._symboltable = symboltable self._constants = constants + self._ppsymbols = ppsymbols def __getattr__(self, name): return self.__getitem__(name) @@ -457,7 +458,13 @@ class Eval_symbol_dict(dict): global_scope = self._symboltable while global_scope.parent: global_scope = global_scope.parent - scope, symbol = global_scope.lookup(name, True) + try: + scope, symbol = global_scope.lookup(name, True) + except (LookupError, SymbolError): + # try the ppsymbols + if self._ppsymbols: + return self._ppsymbols.as_eval_dict(None)[name] + raise SymbolError("undefined symbol '{:s}'".format(name)) from None if self._constants: if isinstance(symbol, ConstantDef): return symbol.value @@ -466,9 +473,11 @@ class Eval_symbol_dict(dict): elif inspect.isbuiltin(symbol): return symbol elif isinstance(symbol, SymbolTable): - return symbol.as_eval_dict() + return symbol.as_eval_dict(self._ppsymbols) + elif isinstance(symbol, LabelDef): + raise SymbolError("can't reference a label here") else: - raise SymbolError("invalid datatype referenced" + repr(symbol)) + raise SymbolError("invalid symbol type referenced " + repr(symbol)) else: raise SymbolError("no support for non-constant expression evaluation yet") diff --git a/tests/test_floats.py b/tests/test_floats.py index 027bfeca7..5ab633a99 100644 --- a/tests/test_floats.py +++ b/tests/test_floats.py @@ -1,45 +1,45 @@ import pytest -from il65 import il65, symbols +from il65 import codegen, symbols def test_float_to_mflpt5(): - mflpt = il65.CodeGenerator.to_mflpt5(1.0) + mflpt = codegen.CodeGenerator.to_mflpt5(1.0) assert type(mflpt) is bytearray - assert b"\x00\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(0) - assert b"\x82\x49\x0F\xDA\xA1" == il65.CodeGenerator.to_mflpt5(3.141592653) - assert b"\x82\x49\x0F\xDA\xA2" == il65.CodeGenerator.to_mflpt5(3.141592653589793) - assert b"\x90\x80\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(-32768) - assert b"\x81\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(1) - assert b"\x80\x35\x04\xF3\x34" == il65.CodeGenerator.to_mflpt5(0.7071067812) - assert b"\x80\x35\x04\xF3\x33" == il65.CodeGenerator.to_mflpt5(0.7071067811865476) - assert b"\x81\x35\x04\xF3\x34" == il65.CodeGenerator.to_mflpt5(1.4142135624) - assert b"\x81\x35\x04\xF3\x33" == il65.CodeGenerator.to_mflpt5(1.4142135623730951) - assert b"\x80\x80\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(-.5) - assert b"\x80\x31\x72\x17\xF8" == il65.CodeGenerator.to_mflpt5(0.69314718061) - assert b"\x80\x31\x72\x17\xF7" == il65.CodeGenerator.to_mflpt5(0.6931471805599453) - assert b"\x84\x20\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(10) - assert b"\x9E\x6E\x6B\x28\x00" == il65.CodeGenerator.to_mflpt5(1000000000) - assert b"\x80\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(.5) - assert b"\x81\x38\xAA\x3B\x29" == il65.CodeGenerator.to_mflpt5(1.4426950408889634) - assert b"\x81\x49\x0F\xDA\xA2" == il65.CodeGenerator.to_mflpt5(1.5707963267948966) - assert b"\x83\x49\x0F\xDA\xA2" == il65.CodeGenerator.to_mflpt5(6.283185307179586) - assert b"\x7F\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(.25) + assert b"\x00\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(0) + assert b"\x82\x49\x0F\xDA\xA1" == codegen.CodeGenerator.to_mflpt5(3.141592653) + assert b"\x82\x49\x0F\xDA\xA2" == codegen.CodeGenerator.to_mflpt5(3.141592653589793) + assert b"\x90\x80\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(-32768) + assert b"\x81\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(1) + assert b"\x80\x35\x04\xF3\x34" == codegen.CodeGenerator.to_mflpt5(0.7071067812) + assert b"\x80\x35\x04\xF3\x33" == codegen.CodeGenerator.to_mflpt5(0.7071067811865476) + assert b"\x81\x35\x04\xF3\x34" == codegen.CodeGenerator.to_mflpt5(1.4142135624) + assert b"\x81\x35\x04\xF3\x33" == codegen.CodeGenerator.to_mflpt5(1.4142135623730951) + assert b"\x80\x80\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(-.5) + assert b"\x80\x31\x72\x17\xF8" == codegen.CodeGenerator.to_mflpt5(0.69314718061) + assert b"\x80\x31\x72\x17\xF7" == codegen.CodeGenerator.to_mflpt5(0.6931471805599453) + assert b"\x84\x20\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(10) + assert b"\x9E\x6E\x6B\x28\x00" == codegen.CodeGenerator.to_mflpt5(1000000000) + assert b"\x80\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(.5) + assert b"\x81\x38\xAA\x3B\x29" == codegen.CodeGenerator.to_mflpt5(1.4426950408889634) + assert b"\x81\x49\x0F\xDA\xA2" == codegen.CodeGenerator.to_mflpt5(1.5707963267948966) + assert b"\x83\x49\x0F\xDA\xA2" == codegen.CodeGenerator.to_mflpt5(6.283185307179586) + assert b"\x7F\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(.25) def test_float_range(): - assert b"\xff\x7f\xff\xff\xff" == il65.CodeGenerator.to_mflpt5(symbols.FLOAT_MAX_POSITIVE) - assert b"\xff\xff\xff\xff\xff" == il65.CodeGenerator.to_mflpt5(symbols.FLOAT_MAX_NEGATIVE) + assert b"\xff\x7f\xff\xff\xff" == codegen.CodeGenerator.to_mflpt5(symbols.FLOAT_MAX_POSITIVE) + assert b"\xff\xff\xff\xff\xff" == codegen.CodeGenerator.to_mflpt5(symbols.FLOAT_MAX_NEGATIVE) with pytest.raises(OverflowError): - il65.CodeGenerator.to_mflpt5(1.7014118346e+38) + codegen.CodeGenerator.to_mflpt5(1.7014118346e+38) with pytest.raises(OverflowError): - il65.CodeGenerator.to_mflpt5(-1.7014118346e+38) + codegen.CodeGenerator.to_mflpt5(-1.7014118346e+38) with pytest.raises(OverflowError): - il65.CodeGenerator.to_mflpt5(1.7014118347e+38) + codegen.CodeGenerator.to_mflpt5(1.7014118347e+38) with pytest.raises(OverflowError): - il65.CodeGenerator.to_mflpt5(-1.7014118347e+38) - assert b"\x03\x39\x1d\x15\x63" == il65.CodeGenerator.to_mflpt5(1.7e-38) - assert b"\x00\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(1.7e-39) - assert b"\x03\xb9\x1d\x15\x63" == il65.CodeGenerator.to_mflpt5(-1.7e-38) - assert b"\x00\x00\x00\x00\x00" == il65.CodeGenerator.to_mflpt5(-1.7e-39) + codegen.CodeGenerator.to_mflpt5(-1.7014118347e+38) + assert b"\x03\x39\x1d\x15\x63" == codegen.CodeGenerator.to_mflpt5(1.7e-38) + assert b"\x00\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(1.7e-39) + assert b"\x03\xb9\x1d\x15\x63" == codegen.CodeGenerator.to_mflpt5(-1.7e-38) + assert b"\x00\x00\x00\x00\x00" == codegen.CodeGenerator.to_mflpt5(-1.7e-39) diff --git a/testsource/calls.ill b/testsource/calls.ill index 4c0854ec2..3300d0447 100644 --- a/testsource/calls.ill +++ b/testsource/calls.ill @@ -10,6 +10,11 @@ var .byte varb1 = 99 memory .byte memb1 = $cff0 + const .word constw = $2355 + const .byte constb = $23 + + subx sub1 () -> () = $ffdd + bar go [AX] ; @todo check indrection jmp (AX) @@ -52,6 +57,13 @@ bar call [$c2dd] call $c000 call $c2 + + + call constw + call sub1 + call main.start + call main.start + } diff --git a/testsource/source1.ill b/testsource/source1.ill index d7bc65bc3..13170282c 100644 --- a/testsource/source1.ill +++ b/testsource/source1.ill @@ -59,7 +59,8 @@ start ;foo ; [646,Y] = [$d020,X] -_loop A ++ +_loop block2.zpw1 ++ + A ++ X ++ Y ++ [$d020] ++ @@ -67,6 +68,7 @@ _loop A ++ X -- Y-- [$d020]-- + [block2.zpw2] = 99 call fidget.subroutine go _loop return 155,2,%00000101 ; will end up in A, X, Y