""" Programming Language for 6502/6510 microprocessors This is the parser of the IL65 code, that generates a parse tree. Written by Irmen de Jong (irmen@razorvine.net) License: GNU GPL 3.0, see LICENSE """ import re import os import sys import shutil import attr from collections import defaultdict from typing import Set, List, Tuple, Optional, Dict, Union, Generator from .exprparse import ParseError, parse_expr_as_int, parse_expr_as_number, parse_expr_as_primitive,\ parse_expr_as_string, parse_arguments, parse_expr_as_comparison from .symbols import * class ProgramFormat(enum.Enum): PRG = "prg" RAW = "raw" class ParseResult: def __init__(self, sourcefile: str) -> None: self.format = ProgramFormat.RAW self.with_sys = False self.sourcefile = sourcefile self.clobberzp = False self.restorezp = False self.start_address = 0 self.blocks = [] # type: List[Block] self.subroutine_usage = defaultdict(set) # type: Dict[Tuple[str, str], Set[str]] self.zeropage = Zeropage() self.preserve_registers = False def all_blocks(self) -> Generator[Block, None, None]: for block in self.blocks: yield block for sub in block.symbols.iter_subroutines(True): yield sub.sub_block def add_block(self, block: Block, position: Optional[int]=None) -> None: if position is not None: self.blocks.insert(position, block) else: self.blocks.append(block) def merge(self, parsed: 'ParseResult') -> None: existing_blocknames = set(block.name for block in self.blocks) other_blocknames = set(block.name for block in parsed.blocks) overlap = existing_blocknames & other_blocknames if overlap != {"
"}: raise SymbolError("double block names: {}".format(overlap)) for block in parsed.blocks: if block.name != "
": self.blocks.append(block) def find_block(self, name: str) -> Block: for block in self.blocks: if block.name == name: return block raise KeyError("block not found: " + name) def sub_used_by(self, sub: SubroutineDef, sourceref: SourceRef) -> None: self.subroutine_usage[(sub.blockname, sub.name)].add(str(sourceref)) class Parser: def __init__(self, filename: str, outputdir: str, existing_imports: Set[str], parsing_import: bool = False, sourcelines: List[Tuple[int, str]] = None, ppsymbols: SymbolTable = None, sub_usage: Dict=None) -> None: self.result = ParseResult(filename) if sub_usage is not None: # re-use the (global) subroutine usage tracking self.result.subroutine_usage = sub_usage self.sourceref = SourceRef(filename, -1, 0) if sourcelines: self.lines = sourcelines else: self.lines = self.load_source(filename) self.outputdir = outputdir self.parsing_import = parsing_import # are we parsing a import file? self._cur_lineidx = -1 # used to efficiently go to next/previous line in source self.cur_block = None # type: Block self.root_scope = SymbolTable("", None, None) self.root_scope.set_zeropage(self.result.zeropage) self.ppsymbols = ppsymbols # symboltable from preprocess phase self.print_block_parsing = True self.existing_imports = existing_imports self.parse_errors = 0 def load_source(self, filename: str) -> List[Tuple[int, str]]: with open(filename, "rU") as source: sourcelines = source.readlines() # store all lines that aren't empty # comments are kept (end-of-line comments are stripped though) lines = [] for num, line in enumerate(sourcelines, start=1): line = line.rstrip() if line.lstrip().startswith(';'): lines.append((num, line.lstrip())) else: line2, sep, comment = line.rpartition(';') if sep: line = line2.rstrip() if line: lines.append((num, line)) return lines def parse(self) -> Optional[ParseResult]: # start the parsing try: result = self.parse_file() except ParseError as x: self.handle_parse_error(x) except Exception as x: if sys.stderr.isatty(): print("\x1b[1m", file=sys.stderr) print("\nERROR: internal parser error: ", x, file=sys.stderr) if self.cur_block: print(" file:", self.sourceref.file, "block:", self.cur_block.name, "line:", self.sourceref.line, file=sys.stderr) else: print(" file:", self.sourceref.file, file=sys.stderr) if sys.stderr.isatty(): print("\x1b[0m", file=sys.stderr, end="", flush=True) raise if self.parse_errors: self.print_bold("\nNo output; there were {:d} errors in file {:s}\n".format(self.parse_errors, self.sourceref.file)) raise SystemExit(1) return result def handle_parse_error(self, exc: ParseError) -> None: self.parse_errors += 1 if sys.stderr.isatty(): print("\x1b[1m", file=sys.stderr) if exc.sourcetext: print("\t" + exc.sourcetext, file=sys.stderr) if exc.sourceref.column: print("\t" + ' ' * exc.sourceref.column + ' ^', file=sys.stderr) if self.parsing_import: print("Error (in imported file):", str(exc), file=sys.stderr) else: print("Error:", str(exc), file=sys.stderr) if sys.stderr.isatty(): print("\x1b[0m", file=sys.stderr, end="", flush=True) def parse_file(self) -> ParseResult: print("\nparsing", self.sourceref.file) self._parse_1() self._parse_import_file("il65lib") # compiler support library is always imported. self._parse_2() return self.result def print_warning(self, text: str, sourceref: SourceRef=None) -> None: self.print_bold("warning: {}: {:s}".format(sourceref or self.sourceref, text)) def print_bold(self, text: str) -> None: if sys.stdout.isatty(): print("\x1b[1m" + text + "\x1b[0m", flush=True) else: print(text) def _parse_comments(self) -> None: while True: line = self.next_line().lstrip() if line.startswith(';'): self.cur_block.statements.append(Comment(line, self.sourceref)) continue self.prev_line() break def _parse_1(self) -> None: self.cur_block = Block("
", self.sourceref, self.root_scope, self.result.preserve_registers) self.result.add_block(self.cur_block) self.parse_header() if not self.parsing_import: self.result.zeropage.configure(self.result.clobberzp) while True: self._parse_comments() next_line = self.peek_next_line().lstrip() if next_line.startswith("~"): block = self.parse_block() if block: self.result.add_block(block) elif next_line.startswith(("%import ", "%import\t")): self.parse_import() else: break line = self.next_line() if line: raise self.PError("invalid statement or characters, block expected") if not self.parsing_import: # check if we have a proper main block to contain the program's entry point main_found = False for block in self.result.blocks: if block.name == "main": main_found = True if "start" not in block.label_names: self.sourceref.line = block.sourceref.line self.sourceref.column = 0 raise self.PError("block 'main' should contain the program entry point 'start'") self._check_return_statement(block, "'main' block") for sub in block.symbols.iter_subroutines(True): self._check_return_statement(sub.sub_block, "subroutine '{:s}'".format(sub.name)) if not main_found: raise self.PError("a block 'main' should be defined and contain the program's entry point label 'start'") def _check_return_statement(self, block: Block, message: str) -> None: # find last statement that isn't a comment for stmt in reversed(block.statements): if isinstance(stmt, Comment): continue if isinstance(stmt, ReturnStmt) or isinstance(stmt, CallStmt) and stmt.is_goto: return if isinstance(stmt, InlineAsm): # check that the last asm line is a jmp or a rts for asmline in reversed(stmt.asmlines): if asmline.strip().replace(' ', '').startswith(";returns"): return if asmline.lstrip().startswith(';'): continue if " rts" in asmline or "\trts" in asmline or " jmp" in asmline or "\tjmp" in asmline: return if asmline.strip(): if asmline.split()[0].isidentifier(): continue break break self.print_warning("{:s} doesn't end with a return statement".format(message), block.sourceref) _immediate_floats = {} # type: Dict[float, Tuple[str, str]] _immediate_string_vars = {} # type: Dict[str, Tuple[str, str]] def _parse_2(self) -> None: # parsing pass 2 (not done during preprocessing!) self.cur_block = None self.sourceref = SourceRef(self.sourceref.file, -1) def imm_string_to_var(stmt: AssignmentStmt, containing_block: Block) -> None: if stmt.right.name or not isinstance(stmt.right, StringValue): return if stmt.right.value in self._immediate_string_vars: blockname, stringvar_name = self._immediate_string_vars[stmt.right.value] if blockname: stmt.right.name = blockname + '.' + stringvar_name else: stmt.right.name = stringvar_name else: stringvar_name = "il65_str_{:d}".format(id(stmt)) value = stmt.right.value containing_block.symbols.define_constant(stringvar_name, stmt.sourceref, DataType.STRING, value=value) stmt.right.name = stringvar_name self._immediate_string_vars[stmt.right.value] = (containing_block.name, stringvar_name) def desugar_immediate_strings(stmt: AstNode, containing_block: Block) -> None: if isinstance(stmt, CallStmt): for s in stmt.desugared_call_arguments: self.sourceref = s.sourceref imm_string_to_var(s, containing_block) for s in stmt.desugared_output_assignments: self.sourceref = s.sourceref imm_string_to_var(s, containing_block) if isinstance(stmt, AssignmentStmt): self.sourceref = stmt.sourceref imm_string_to_var(stmt, containing_block) def desugar_immediate_floats(stmt: AstNode, containing_block: Block) -> None: if isinstance(stmt, (InplaceIncrStmt, InplaceDecrStmt)): howmuch = stmt.value.value if howmuch is None: assert stmt.value.name return if howmuch in (0, 1) or type(howmuch) is int: return # 1 is special cased in the code generator rom_floats = { 1: "c64.FL_FONE", .25: "c64.FL_FR4", .5: "c64.FL_FHALF", -.5: "c64.FL_NEGHLF", 10: "c64.FL_TENC", -32768: "c64.FL_N32768", 1e9: "c64.FL_NZMIL", math.pi: "c64.FL_PIVAL", math.pi / 2: "c64.FL_PIHALF", math.pi * 2: "c64.FL_TWOPI", math.sqrt(2)/2.0: "c64.FL_SQRHLF", math.sqrt(2): "c64.FL_SQRTWO", math.log(2): "c64.FL_LOG2", 1.0 / math.log(2): "c64.FL_LOGEB2", } for fv, name in rom_floats.items(): if math.isclose(howmuch, fv, rel_tol=0, abs_tol=1e-9): # use one of the constants available in ROM stmt.value.name = name return if howmuch in self._immediate_floats: # reuse previously defined float constant blockname, floatvar_name = self._immediate_floats[howmuch] if blockname: stmt.value.name = blockname + '.' + floatvar_name else: stmt.value.name = floatvar_name else: # define new float variable to hold the incr/decr value # note: not a constant, because we need the MFLT bytes floatvar_name = "il65_float_{:d}".format(id(stmt)) containing_block.symbols.define_variable(floatvar_name, stmt.sourceref, DataType.FLOAT, value=howmuch) self._immediate_floats[howmuch] = (containing_block.name, floatvar_name) stmt.value.name = floatvar_name for block in self.result.blocks: self.cur_block = block self.sourceref = attr.evolve(block.sourceref, column=0) for _, sub, stmt in block.all_statements(): if isinstance(stmt, CallStmt): self.sourceref = stmt.sourceref self.desugar_call_arguments_and_outputs(stmt) desugar_immediate_strings(stmt, self.cur_block) desugar_immediate_floats(stmt, self.cur_block) def desugar_call_arguments_and_outputs(self, stmt: CallStmt) -> None: stmt.desugared_call_arguments.clear() stmt.desugared_output_assignments.clear() for name, value in stmt.arguments or []: assert name is not None, "all call arguments should have a name or be matched on a named parameter" assignment = self.parse_assignment(name, value) assignment.sourceref = stmt.sourceref if assignment.leftvalues[0].datatype != DataType.BYTE: if isinstance(assignment.right, IntegerValue) and assignment.right.constant: # a call that doesn't expect a BYTE argument but gets one, converted from a 1-byte string most likely if value.startswith("'") and value.endswith("'"): self.print_warning("possible problematic string to byte conversion (use a .text var instead?)") if not assignment.is_identity(): stmt.desugared_call_arguments.append(assignment) if all(not isinstance(v, RegisterValue) for r, v in stmt.outputvars or []): # if none of the output variables are registers, we can simply generate the assignments without issues for register, value in stmt.outputvars or []: rvalue = self.parse_expression(register) assignment = AssignmentStmt([value], rvalue, stmt.sourceref) stmt.desugared_output_assignments.append(assignment) else: result_reg_mapping = [(register, value.register, value) for register, value in stmt.outputvars or [] if isinstance(value, RegisterValue)] if any(r[0] != r[1] for r in result_reg_mapping): # not all result parameter registers line up with the correct order of registers in the statement, # reshuffling call results is not supported yet. raise self.PError("result registers and/or their ordering is not the same as in the " "subroutine definition, this isn't supported yet") else: # no register alignment issues, just generate the assignments # note: do not remove the identity assignment here or the output register handling generates buggy code for register, value in stmt.outputvars or []: rvalue = self.parse_expression(register) assignment = AssignmentStmt([value], rvalue, stmt.sourceref) stmt.desugared_output_assignments.append(assignment) def next_line(self) -> str: self._cur_lineidx += 1 try: lineno, line = self.lines[self._cur_lineidx] self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) return line except IndexError: return "" def prev_line(self) -> str: self._cur_lineidx -= 1 lineno, line = self.lines[self._cur_lineidx] self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) return line def peek_next_line(self) -> str: if (self._cur_lineidx + 1) < len(self.lines): return self.lines[self._cur_lineidx + 1][1] return "" def PError(self, message: str, lineno: int=0, column: int=0) -> ParseError: sourceline = "" lineno = lineno or self.sourceref.line column = column or self.sourceref.column for num, text in self.lines: if num == lineno: sourceline = text.strip() break return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column)) def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: if typestr == ".byte": return DataType.BYTE, 1, None elif typestr == ".word": return DataType.WORD, 1, None elif typestr == ".float": return DataType.FLOAT, 1, None elif typestr.endswith("text"): if typestr == ".text": return DataType.STRING, 0, None elif typestr == ".ptext": return DataType.STRING_P, 0, None elif typestr == ".stext": return DataType.STRING_S, 0, None elif typestr == ".pstext": return DataType.STRING_PS, 0, None elif typestr.startswith(".array(") and typestr.endswith(")"): return DataType.BYTEARRAY, self._size_from_arraydecl(typestr), None elif typestr.startswith(".wordarray(") and typestr.endswith(")"): return DataType.WORDARRAY, self._size_from_arraydecl(typestr), None elif typestr.startswith(".matrix(") and typestr.endswith(")"): dimensions = self._size_from_matrixdecl(typestr) return DataType.MATRIX, dimensions[0] * dimensions[1], dimensions raise self.PError("invalid data type: " + typestr) def parse_header(self) -> None: self.result.with_sys = False self.result.format = ProgramFormat.RAW output_specified = False zp_specified = False preserve_specified = False while True: self._parse_comments() line = self.next_line() if line.startswith('%'): directive = line.split(maxsplit=1)[0][1:] if directive == "output": if output_specified: raise self.PError("can only specify output options once") output_specified = True _, _, optionstr = line.partition(" ") options = set(optionstr.replace(' ', '').split(',')) self.result.with_sys = False self.result.format = ProgramFormat.RAW if "raw" in options: options.remove("raw") if "prg" in options: options.remove("prg") self.result.format = ProgramFormat.PRG if "basic" in options: options.remove("basic") if self.result.format == ProgramFormat.PRG: self.result.with_sys = True else: raise self.PError("can only use basic output option with prg, not raw") if options: raise self.PError("invalid output option(s): " + str(options)) continue elif directive == "zp": if zp_specified: raise self.PError("can only specify ZP options once") zp_specified = True _, _, optionstr = line.partition(" ") options = set(optionstr.replace(' ', '').split(',')) self.result.clobberzp = False self.result.restorezp = False if "clobber" in options: options.remove("clobber") self.result.clobberzp = True if "restore" in options: options.remove("restore") if self.result.clobberzp: self.result.restorezp = True else: raise self.PError("can only use restore zp option if clobber zp is used as well") if options: raise self.PError("invalid zp option(s): " + str(options)) continue elif directive == "address": if self.result.start_address: raise self.PError("multiple occurrences of 'address'") _, _, arg = line.partition(" ") try: self.result.start_address = parse_expr_as_int(arg, None, None, self.sourceref) except ParseError: raise self.PError("invalid address") if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") continue elif directive == "saveregisters": if preserve_specified: raise self.PError("can only specify saveregisters option once") preserve_specified = True _, _, optionstr = line.partition(" ") self.result.preserve_registers = optionstr in ("", "true", "yes") continue elif directive == "import": break # the first import directive actually is not part of the header anymore else: raise self.PError("invalid directive") break # no more directives, header parsing finished! self.prev_line() if not self.result.start_address: # set the proper default start address if self.result.format == ProgramFormat.PRG: self.result.start_address = 0x0801 # normal C-64 basic program start address elif self.result.format == ProgramFormat.RAW: self.result.start_address = 0xc000 # default start for raw assembly if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") def parse_import(self) -> None: line = self.next_line() line = line.lstrip() if not line.startswith(("%import ", "%import\t")): raise self.PError("expected import") try: _, filename = line.split(maxsplit=1) except ValueError: raise self.PError("invalid import statement") if filename[0] in "'\"" and filename[-1] in "'\"": filename = filename[1:-1] if not filename: raise self.PError("invalid filename") self._parse_import_file(filename) def _parse_import_file(self, filename: str) -> None: filename_at_source_location = os.path.join(os.path.split(self.sourceref.file)[0], filename) filename_at_libs_location = os.path.join(os.getcwd(), "lib", filename) candidates = [filename, filename_at_source_location, filename_at_libs_location, filename+".ill", filename_at_source_location+".ill", filename_at_libs_location+".ill"] for filename in candidates: if os.path.isfile(filename): if not self.check_import_okay(filename): return self.print_import_progress("importing", filename) parser = self.create_import_parser(filename, self.outputdir) result = parser.parse() self.print_import_progress("\ncontinuing", self.sourceref.file) if result: # merge the symbol table of the imported file into our own try: self.root_scope.merge_roots(parser.root_scope) self.result.merge(result) except SymbolError as x: raise self.PError(str(x)) return else: raise self.PError("Error while parsing imported file") raise self.PError("imported file not found") def print_import_progress(self, message: str, *args: str) -> None: print(message, *args) def create_import_parser(self, filename: str, outputdir: str) -> 'Parser': return Parser(filename, outputdir, self.existing_imports, True, ppsymbols=self.ppsymbols, sub_usage=self.result.subroutine_usage) def parse_block(self) -> Optional[Block]: # first line contains block header "~ [name] [addr]" followed by a '{' self._parse_comments() line = self.next_line() line = line.lstrip() if not line.startswith("~"): raise self.PError("expected '~' (block)") block_args = line[1:].split() arg = "" self.cur_block = Block("", self.sourceref, self.root_scope, self.result.preserve_registers) is_zp_block = False while block_args: arg = block_args.pop(0) if arg.isidentifier(): if arg.lower() == "zeropage" or arg in ("zp", "zP", "Zp"): raise self.PError("zero page block should be named 'ZP'") is_zp_block = arg == "ZP" if arg in set(b.name for b in self.result.blocks): orig = [b for b in self.result.blocks if b.name == arg][0] if not is_zp_block: raise self.PError("duplicate block name '{:s}', original definition at {}".format(arg, orig.sourceref)) self.cur_block = orig # zero page block occurrences are merged else: self.cur_block = Block(arg, self.sourceref, self.root_scope, self.result.preserve_registers) try: self.root_scope.define_scope(self.cur_block.symbols, self.cur_block.sourceref) except SymbolError as x: raise self.PError(str(x)) elif arg == "{": break elif arg.endswith("{"): # when there is no whitespace before the { block_args.insert(0, "{") block_args.insert(0, arg[:-1]) continue else: try: block_address = parse_expr_as_int(arg, self.cur_block.symbols, self.ppsymbols, self.sourceref) except ParseError: raise self.PError("Invalid block address") if block_address == 0 or (block_address < 0x0200 and not is_zp_block): raise self.PError("block address must be >= $0200 (or omitted)") if is_zp_block: if block_address not in (0, 0x04): raise self.PError("zero page block address must be $04 (or omittted)") block_address = 0x04 self.cur_block.address = block_address if arg != "{": line = self.peek_next_line() if line != "{": raise self.PError("expected '{' after block") else: self.next_line() if self.print_block_parsing: if self.cur_block.address: print(" parsing block '{:s}' at ${:04x}".format(self.cur_block.name, self.cur_block.address)) else: print(" parsing block '{:s}'".format(self.cur_block.name)) if self.cur_block.ignore: # just skip the lines until we hit a '}' that closes the block nesting_level = 1 while True: line = self.next_line().strip() if line.endswith("{"): nesting_level += 1 elif line == "}": nesting_level -= 1 if nesting_level == 0: self.print_warning("ignoring block without name and address", self.cur_block.sourceref) return None else: raise self.PError("invalid statement in block") while True: try: go_on, resultblock = self._parse_block_statement(is_zp_block) if not go_on: return resultblock except ParseError as x: self.handle_parse_error(x) def _parse_block_statement(self, is_zp_block: bool) -> Tuple[bool, Optional[Block]]: # parse the statements inside a block self._parse_comments() line = self.next_line() unstripped_line = line line = line.strip() if line.startswith('%'): directive, _, optionstr = line.partition(" ") directive = directive[1:] self.cur_block.preserve_registers = optionstr in ("", "true", "yes") if directive in ("asminclude", "asmbinary"): if is_zp_block: raise self.PError("ZP block cannot contain assembler directives") self.cur_block.statements.append(self.parse_asminclude(line)) elif directive == "asm": if is_zp_block: raise self.PError("ZP block cannot contain code statements") self.prev_line() self.cur_block.statements.append(self.parse_asm()) elif directive == "breakpoint": self.cur_block.statements.append(BreakpointStmt(self.sourceref)) self.print_warning("breakpoint defined") elif directive == "saveregisters": self.result.preserve_registers = optionstr in ("", "true", "yes") else: raise self.PError("invalid directive") elif line == "}": if is_zp_block and any(b.name == "ZP" for b in self.result.blocks): return False, None # we already have the ZP block if self.cur_block.ignore: self.print_warning("ignoring block without name and address", self.cur_block.sourceref) return False, None return False, self.cur_block elif line.startswith(("var ", "var\t")): self.parse_var_def(line) elif line.startswith(("const ", "const\t")): self.parse_const_def(line) elif line.startswith(("memory ", "memory\t")): self.parse_memory_def(line, is_zp_block) elif line.startswith(("sub ", "sub\t")): if is_zp_block: raise self.PError("ZP block cannot contain subroutines") self.parse_subroutine_def(line) elif unstripped_line.startswith((" ", "\t")): if line.endswith("{"): raise self.PError("invalid statement") if is_zp_block: raise self.PError("ZP block cannot contain code statements") self.cur_block.statements.append(self.parse_statement(line)) elif line: match = re.fullmatch(r"(?P