""" Programming Language for 6502/6510 microprocessors This is the hand-written parser of the IL65 code, that generates a parse tree. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ import re import os import sys import shutil from collections import defaultdict from .exprparse import ParseError, parse_expr_as_int, parse_expr_as_number, parse_expr_as_primitive,\ parse_expr_as_string, parse_arguments, parse_expr_as_comparison from .symbols import * class ProgramFormat(enum.Enum): PRG = "prg" RAW = "raw" class ParseResult: def __init__(self, sourcefile: str) -> None: self.format = ProgramFormat.RAW self.with_sys = False self.sourcefile = sourcefile self.clobberzp = False self.restorezp = False self.start_address = 0 self.blocks = [] # type: List[Block] self.subroutine_usage = defaultdict(set) # type: Dict[Tuple[str, str], Set[str]] self.zeropage = Zeropage() self.preserve_registers = False def all_blocks(self) -> Generator[Block, None, None]: for block in self.blocks: yield block for sub in block.symbols.iter_subroutines(True): yield sub.sub_block def add_block(self, block: Block, position: Optional[int]=None) -> None: if position is not None: self.blocks.insert(position, block) else: self.blocks.append(block) def merge(self, parsed: 'ParseResult') -> None: existing_blocknames = set(block.name for block in self.blocks) other_blocknames = set(block.name for block in parsed.blocks) overlap = existing_blocknames & other_blocknames if overlap != {"
"}: raise SymbolError("double block names: {}".format(overlap)) for block in parsed.blocks: if block.name != "
": self.blocks.append(block) def find_block(self, name: str) -> Block: for block in self.blocks: if block.name == name: return block raise KeyError("block not found: " + name) def sub_used_by(self, sub: SubroutineDef, sourceref: SourceRef) -> None: self.subroutine_usage[(sub.blockname, sub.name)].add(str(sourceref)) class Parser: def __init__(self, filename: str, outputdir: str, existing_imports: Set[str], parsing_import: bool = False, sourcelines: List[Tuple[int, str]] = None, ppsymbols: SymbolTable = None, sub_usage: Dict=None) -> None: self.result = ParseResult(filename) if sub_usage is not None: # re-use the (global) subroutine usage tracking self.result.subroutine_usage = sub_usage self.sourceref = SourceRef(filename, -1, 0) # type: ignore if sourcelines: self.lines = sourcelines else: self.lines = self.load_source(filename) self.outputdir = outputdir self.parsing_import = parsing_import # are we parsing a import file? self._cur_lineidx = -1 # used to efficiently go to next/previous line in source self.cur_block = None # type: Block self.root_scope = SymbolTable("", None, None) self.root_scope.set_zeropage(self.result.zeropage) self.ppsymbols = ppsymbols # symboltable from preprocess phase self.print_block_parsing = True self.existing_imports = existing_imports self.parse_errors = 0 def load_source(self, filename: str) -> List[Tuple[int, str]]: with open(filename, "rU") as source: sourcelines = source.readlines() # store all lines that aren't empty # comments are kept (end-of-line comments are stripped though) lines = [] for num, line in enumerate(sourcelines, start=1): line = line.rstrip() if line.lstrip().startswith(';'): lines.append((num, line.lstrip())) else: line2, sep, comment = line.rpartition(';') if sep: line = line2.rstrip() if line: lines.append((num, line)) return lines def parse(self) -> Optional[ParseResult]: # start the parsing try: result = self.parse_file() except ParseError as x: self.handle_parse_error(x) except Exception as x: if sys.stderr.isatty(): print("\x1b[1m", file=sys.stderr) print("\nERROR: internal parser error: ", x, file=sys.stderr) if self.cur_block: print(" file:", self.sourceref.file, "block:", self.cur_block.name, "line:", self.sourceref.line, file=sys.stderr) else: print(" file:", self.sourceref.file, file=sys.stderr) if sys.stderr.isatty(): print("\x1b[0m", file=sys.stderr, end="", flush=True) raise if self.parse_errors: self.print_bold("\nNo output; there were {:d} errors in file {:s}\n".format(self.parse_errors, self.sourceref.file)) raise SystemExit(1) return result def handle_parse_error(self, exc: ParseError) -> None: self.parse_errors += 1 if sys.stderr.isatty(): print("\x1b[1m", file=sys.stderr) if exc.sourcetext: print("\t" + exc.sourcetext, file=sys.stderr) if exc.sourceref.column: print("\t" + ' ' * exc.sourceref.column + ' ^', file=sys.stderr) if self.parsing_import: print("Error (in imported file):", str(exc), file=sys.stderr) else: print("Error:", str(exc), file=sys.stderr) if sys.stderr.isatty(): print("\x1b[0m", file=sys.stderr, end="", flush=True) def parse_file(self) -> ParseResult: print("\nparsing", self.sourceref.file) self._parse_1() self._parse_import_file("il65lib") # compiler support library is always imported. self._parse_2() return self.result def print_warning(self, text: str, sourceref: SourceRef=None) -> None: self.print_bold("warning: {}: {:s}".format(sourceref or self.sourceref, text)) def print_bold(self, text: str) -> None: if sys.stdout.isatty(): print("\x1b[1m" + text + "\x1b[0m", flush=True) else: print(text) def _parse_comments(self) -> None: while True: line = self.next_line().lstrip() if line.startswith(';'): self.cur_block.statements.append(Comment(line, self.sourceref)) continue self.prev_line() break def _parse_1(self) -> None: self.cur_block = Block("
", self.sourceref, self.root_scope, self.result.preserve_registers) self.result.add_block(self.cur_block) self.parse_header() if not self.parsing_import: self.result.zeropage.configure(self.result.clobberzp) while True: self._parse_comments() next_line = self.peek_next_line().lstrip() if next_line.startswith("~"): block = self.parse_block() if block: self.result.add_block(block) elif next_line.startswith(("%import ", "%import\t")): self.parse_import() else: break line = self.next_line() if line: raise self.PError("invalid statement or characters, block expected") if not self.parsing_import: # check if we have a proper main block to contain the program's entry point main_found = False for block in self.result.blocks: if block.name == "main": main_found = True if "start" not in block.label_names: self.sourceref.line = block.sourceref.line self.sourceref.column = 0 raise self.PError("block 'main' should contain the program entry point 'start'") self._check_return_statement(block, "'main' block") for sub in block.symbols.iter_subroutines(True): self._check_return_statement(sub.sub_block, "subroutine '{:s}'".format(sub.name)) if not main_found: raise self.PError("a block 'main' must be defined and contain the program's entry point label 'start'") def _check_return_statement(self, block: Block, message: str) -> None: # find last statement that isn't a comment for stmt in reversed(block.statements): if isinstance(stmt, Comment): continue if isinstance(stmt, ReturnStmt) or isinstance(stmt, CallStmt) and stmt.is_goto: return if isinstance(stmt, InlineAsm): # check that the last asm line is a jmp or a rts for asmline in reversed(stmt.asmlines): if asmline.strip().replace(' ', '').startswith(";returns"): return if asmline.lstrip().startswith(';'): continue if " rts" in asmline or "\trts" in asmline or " jmp" in asmline or "\tjmp" in asmline: return if asmline.strip(): if asmline.split()[0].isidentifier(): continue break break self.print_warning("{:s} doesn't end with a return statement".format(message), block.sourceref) _immediate_floats = {} # type: Dict[float, Tuple[str, str]] _immediate_string_vars = {} # type: Dict[str, Tuple[str, str]] def _parse_2(self) -> None: # parsing pass 2 (not done during preprocessing!) self.cur_block = None self.sourceref = SourceRef(self.sourceref.file, -1) # type: ignore def imm_string_to_var(stmt: AssignmentStmt, containing_block: Block) -> None: if stmt.right.name or not isinstance(stmt.right, StringValue): return if stmt.right.value in self._immediate_string_vars: blockname, stringvar_name = self._immediate_string_vars[stmt.right.value] if blockname: stmt.right.name = blockname + '.' + stringvar_name else: stmt.right.name = stringvar_name else: stringvar_name = "il65_str_{:d}".format(id(stmt)) value = stmt.right.value containing_block.symbols.define_constant(stringvar_name, stmt.sourceref, DataType.STRING, value=value) stmt.right.name = stringvar_name self._immediate_string_vars[stmt.right.value] = (containing_block.name, stringvar_name) def desugar_immediate_strings(stmt: AstNode, containing_block: Block) -> None: if isinstance(stmt, CallStmt): for s in stmt.desugared_call_arguments: self.sourceref = s.sourceref imm_string_to_var(s, containing_block) for s in stmt.desugared_output_assignments: self.sourceref = s.sourceref imm_string_to_var(s, containing_block) if isinstance(stmt, AssignmentStmt): self.sourceref = stmt.sourceref imm_string_to_var(stmt, containing_block) def desugar_immediate_floats(stmt: AstNode, containing_block: Block) -> None: if isinstance(stmt, (InplaceIncrStmt, InplaceDecrStmt)): howmuch = stmt.value.value if howmuch is None: assert stmt.value.name return if howmuch in (0, 1) or type(howmuch) is int: return # 1 is special cased in the code generator rom_floats = { 1: "c64.FL_FONE", .25: "c64.FL_FR4", .5: "c64.FL_FHALF", -.5: "c64.FL_NEGHLF", 10: "c64.FL_TENC", -32768: "c64.FL_N32768", 1e9: "c64.FL_NZMIL", math.pi: "c64.FL_PIVAL", math.pi / 2: "c64.FL_PIHALF", math.pi * 2: "c64.FL_TWOPI", math.sqrt(2)/2.0: "c64.FL_SQRHLF", math.sqrt(2): "c64.FL_SQRTWO", math.log(2): "c64.FL_LOG2", 1.0 / math.log(2): "c64.FL_LOGEB2", } for fv, name in rom_floats.items(): if math.isclose(howmuch, fv, rel_tol=0, abs_tol=1e-9): # use one of the constants available in ROM stmt.value.name = name return if howmuch in self._immediate_floats: # reuse previously defined float constant blockname, floatvar_name = self._immediate_floats[howmuch] if blockname: stmt.value.name = blockname + '.' + floatvar_name else: stmt.value.name = floatvar_name else: # define new float variable to hold the incr/decr value # note: not a constant, because we need the MFLT bytes floatvar_name = "il65_float_{:d}".format(id(stmt)) containing_block.symbols.define_variable(floatvar_name, stmt.sourceref, DataType.FLOAT, value=howmuch) self._immediate_floats[howmuch] = (containing_block.name, floatvar_name) stmt.value.name = floatvar_name for block in self.result.blocks: self.cur_block = block self.sourceref = attr.evolve(block.sourceref, column=0) for _, sub, stmt in block.all_statements(): if isinstance(stmt, CallStmt): self.sourceref = stmt.sourceref self.desugar_call_arguments_and_outputs(stmt) desugar_immediate_strings(stmt, self.cur_block) desugar_immediate_floats(stmt, self.cur_block) def desugar_call_arguments_and_outputs(self, stmt: CallStmt) -> None: stmt.desugared_call_arguments.clear() stmt.desugared_output_assignments.clear() for name, value in stmt.arguments or []: assert name is not None, "all call arguments should have a name or be matched on a named parameter" assignment = self.parse_assignment(name, value) assignment.sourceref = stmt.sourceref if assignment.leftvalues[0].datatype != DataType.BYTE: if isinstance(assignment.right, IntegerValue) and assignment.right.constant: # a call that doesn't expect a BYTE argument but gets one, converted from a 1-byte string most likely if value.startswith("'") and value.endswith("'"): self.print_warning("possible problematic string to byte conversion (use a .text var instead?)") if not assignment.is_identity(): stmt.desugared_call_arguments.append(assignment) if all(not isinstance(v, RegisterValue) for r, v in stmt.outputvars or []): # if none of the output variables are registers, we can simply generate the assignments without issues for register, value in stmt.outputvars or []: rvalue = self.parse_expression(register) assignment = AssignmentStmt([value], rvalue, stmt.sourceref) stmt.desugared_output_assignments.append(assignment) else: result_reg_mapping = [(register, value.register, value) for register, value in stmt.outputvars or [] if isinstance(value, RegisterValue)] if any(r[0] != r[1] for r in result_reg_mapping): # not all result parameter registers line up with the correct order of registers in the statement, # reshuffling call results is not supported yet. raise self.PError("result registers and/or their ordering is not the same as in the " "subroutine definition, this isn't supported yet") else: # no register alignment issues, just generate the assignments # note: do not remove the identity assignment here or the output register handling generates buggy code for register, value in stmt.outputvars or []: rvalue = self.parse_expression(register) assignment = AssignmentStmt([value], rvalue, stmt.sourceref) stmt.desugared_output_assignments.append(assignment) def next_line(self) -> str: self._cur_lineidx += 1 try: lineno, line = self.lines[self._cur_lineidx] self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) # type: ignore return line except IndexError: return "" def prev_line(self) -> str: self._cur_lineidx -= 1 lineno, line = self.lines[self._cur_lineidx] self.sourceref = SourceRef(file=self.sourceref.file, line=lineno) # type: ignore return line def peek_next_line(self) -> str: if (self._cur_lineidx + 1) < len(self.lines): return self.lines[self._cur_lineidx + 1][1] return "" def PError(self, message: str, lineno: int=0, column: int=0) -> ParseError: sourceline = "" lineno = lineno or self.sourceref.line column = column or self.sourceref.column for num, text in self.lines: if num == lineno: sourceline = text.strip() break return ParseError(message, sourceline, SourceRef(self.sourceref.file, lineno, column)) # type: ignore def get_datatype(self, typestr: str) -> Tuple[DataType, int, Optional[Tuple[int, int]]]: if typestr == ".byte": return DataType.BYTE, 1, None elif typestr == ".word": return DataType.WORD, 1, None elif typestr == ".float": return DataType.FLOAT, 1, None elif typestr.endswith("text"): if typestr == ".text": return DataType.STRING, 0, None elif typestr == ".ptext": return DataType.STRING_P, 0, None elif typestr == ".stext": return DataType.STRING_S, 0, None elif typestr == ".pstext": return DataType.STRING_PS, 0, None elif typestr.startswith(".array(") and typestr.endswith(")"): return DataType.BYTEARRAY, self._size_from_arraydecl(typestr), None elif typestr.startswith(".wordarray(") and typestr.endswith(")"): return DataType.WORDARRAY, self._size_from_arraydecl(typestr), None elif typestr.startswith(".matrix(") and typestr.endswith(")"): dimensions = self._size_from_matrixdecl(typestr) return DataType.MATRIX, dimensions[0] * dimensions[1], dimensions raise self.PError("invalid data type: " + typestr) def parse_header(self) -> None: self.result.with_sys = False self.result.format = ProgramFormat.RAW output_specified = False zp_specified = False preserve_specified = False while True: self._parse_comments() line = self.next_line() if line.startswith('%'): directive = line.split(maxsplit=1)[0][1:] if directive == "output": if output_specified: raise self.PError("can only specify output options once") output_specified = True _, _, optionstr = line.partition(" ") options = set(optionstr.replace(' ', '').split(',')) self.result.with_sys = False self.result.format = ProgramFormat.RAW if "raw" in options: options.remove("raw") if "prg" in options: options.remove("prg") self.result.format = ProgramFormat.PRG if "basic" in options: options.remove("basic") if self.result.format == ProgramFormat.PRG: self.result.with_sys = True else: raise self.PError("can only use basic output option with prg, not raw") if options: raise self.PError("invalid output option(s): " + str(options)) continue elif directive == "zp": if zp_specified: raise self.PError("can only specify ZP options once") zp_specified = True _, _, optionstr = line.partition(" ") options = set(optionstr.replace(' ', '').split(',')) self.result.clobberzp = False self.result.restorezp = False if "clobber" in options: options.remove("clobber") self.result.clobberzp = True if "restore" in options: options.remove("restore") if self.result.clobberzp: self.result.restorezp = True else: raise self.PError("can only use restore zp option if clobber zp is used as well") if options: raise self.PError("invalid zp option(s): " + str(options)) continue elif directive == "address": if self.result.start_address: raise self.PError("multiple occurrences of 'address'") _, _, arg = line.partition(" ") try: self.result.start_address = parse_expr_as_int(arg, None, None, self.sourceref) except ParseError: raise self.PError("invalid address") if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") continue elif directive == "saveregisters": if preserve_specified: raise self.PError("can only specify saveregisters option once") preserve_specified = True _, _, optionstr = line.partition(" ") self.result.preserve_registers = optionstr in ("", "true", "yes") continue elif directive == "import": break # the first import directive actually is not part of the header anymore else: raise self.PError("invalid directive") break # no more directives, header parsing finished! self.prev_line() if not self.result.start_address: # set the proper default start address if self.result.format == ProgramFormat.PRG: self.result.start_address = 0x0801 # normal C-64 basic program start address elif self.result.format == ProgramFormat.RAW: self.result.start_address = 0xc000 # default start for raw assembly if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801: raise self.PError("cannot use non-default 'address' when output format includes basic SYS program") def parse_import(self) -> None: line = self.next_line() line = line.lstrip() if not line.startswith(("%import ", "%import\t")): raise self.PError("expected import") try: _, filename = line.split(maxsplit=1) except ValueError: raise self.PError("invalid import statement") if filename[0] in "'\"" and filename[-1] in "'\"": filename = filename[1:-1] if not filename: raise self.PError("invalid filename") self._parse_import_file(filename) def _parse_import_file(self, filename: str) -> None: candidates = [filename+".ill", filename] filename_at_source_location = os.path.join(os.path.split(self.sourceref.file)[0], filename) if filename_at_source_location not in candidates: candidates.append(filename_at_source_location+".ill") candidates.append(filename_at_source_location) filename_at_libs_location = os.path.join(os.path.split(__file__)[0], "../lib", filename) if filename_at_libs_location not in candidates: candidates.append(filename_at_libs_location+".ill") candidates.append(filename_at_libs_location) for filename in candidates: if os.path.isfile(filename): if not self.check_import_okay(filename): return self.print_import_progress("importing", filename) parser = self.create_import_parser(filename, self.outputdir) result = parser.parse() self.print_import_progress("\ncontinuing", self.sourceref.file) if result: # merge the symbol table of the imported file into our own try: self.root_scope.merge_roots(parser.root_scope) self.result.merge(result) except SymbolError as x: raise self.PError(str(x)) return else: raise self.PError("Error while parsing imported file") raise self.PError("imported file not found") def print_import_progress(self, message: str, *args: str) -> None: print(message, *args) def create_import_parser(self, filename: str, outputdir: str) -> 'Parser': return Parser(filename, outputdir, self.existing_imports, True, ppsymbols=self.ppsymbols, sub_usage=self.result.subroutine_usage) def parse_block(self) -> Optional[Block]: # first line contains block header "~ [name] [addr]" followed by a '{' self._parse_comments() line = self.next_line() line = line.lstrip() if not line.startswith("~"): raise self.PError("expected '~' (block)") block_args = line[1:].split() arg = "" self.cur_block = Block("", self.sourceref, self.root_scope, self.result.preserve_registers) is_zp_block = False while block_args: arg = block_args.pop(0) if arg.isidentifier(): if arg.lower() == "zeropage" or arg in ("zp", "zP", "Zp"): raise self.PError("zero page block must be named 'ZP'") is_zp_block = arg == "ZP" if arg in set(b.name for b in self.result.blocks): orig = [b for b in self.result.blocks if b.name == arg][0] if not is_zp_block: raise self.PError("duplicate block name '{:s}', original definition at {}".format(arg, orig.sourceref)) self.cur_block = orig # zero page block occurrences are merged else: self.cur_block = Block(arg, self.sourceref, self.root_scope, self.result.preserve_registers) try: self.root_scope.define_scope(self.cur_block.symbols, self.cur_block.sourceref) except SymbolError as x: raise self.PError(str(x)) elif arg == "{": break elif arg.endswith("{"): # when there is no whitespace before the { block_args.insert(0, "{") block_args.insert(0, arg[:-1]) continue else: try: block_address = parse_expr_as_int(arg, self.cur_block.symbols, self.ppsymbols, self.sourceref) except ParseError: raise self.PError("Invalid block address") if block_address == 0 or (block_address < 0x0200 and not is_zp_block): raise self.PError("block address must be >= $0200 (or omitted)") if is_zp_block: if block_address not in (0, 0x04): raise self.PError("zero page block address must be $04 (or omittted)") block_address = 0x04 self.cur_block.address = block_address if arg != "{": line = self.peek_next_line() if line != "{": raise self.PError("expected '{' after block") else: self.next_line() if self.print_block_parsing: if self.cur_block.address: print(" parsing block '{:s}' at ${:04x}".format(self.cur_block.name, self.cur_block.address)) else: print(" parsing block '{:s}'".format(self.cur_block.name)) if self.cur_block.ignore: # just skip the lines until we hit a '}' that closes the block nesting_level = 1 while True: line = self.next_line().strip() if line.endswith("{"): nesting_level += 1 elif line == "}": nesting_level -= 1 if nesting_level == 0: self.print_warning("ignoring block without name and address", self.cur_block.sourceref) return None else: raise self.PError("invalid statement in block") while True: try: go_on, resultblock = self._parse_block_statement(is_zp_block) if not go_on: return resultblock except ParseError as x: self.handle_parse_error(x) def _parse_block_statement(self, is_zp_block: bool) -> Tuple[bool, Optional[Block]]: # parse the statements inside a block self._parse_comments() line = self.next_line() unstripped_line = line line = line.strip() if line.startswith('%'): directive, _, optionstr = line.partition(" ") directive = directive[1:] self.cur_block.preserve_registers = optionstr in ("", "true", "yes") if directive in ("asminclude", "asmbinary"): if is_zp_block: raise self.PError("ZP block cannot contain assembler directives") self.cur_block.statements.append(self.parse_asminclude(line)) elif directive == "asm": if is_zp_block: raise self.PError("ZP block cannot contain code statements") self.prev_line() self.cur_block.statements.append(self.parse_asm()) elif directive == "breakpoint": self.cur_block.statements.append(BreakpointStmt(self.sourceref)) self.print_warning("breakpoint defined") elif directive == "saveregisters": self.result.preserve_registers = optionstr in ("", "true", "yes") else: raise self.PError("invalid directive") elif line == "}": if is_zp_block and any(b.name == "ZP" for b in self.result.blocks): return False, None # we already have the ZP block if self.cur_block.ignore: self.print_warning("ignoring block without name and address", self.cur_block.sourceref) return False, None return False, self.cur_block elif line.startswith(("var ", "var\t")): self.parse_var_def(line) elif line.startswith(("const ", "const\t")): self.parse_const_def(line) elif line.startswith(("memory ", "memory\t")): self.parse_memory_def(line, is_zp_block) elif line.startswith(("sub ", "sub\t")): if is_zp_block: raise self.PError("ZP block cannot contain subroutines") self.parse_subroutine_def(line) elif unstripped_line.startswith((" ", "\t")): if line.endswith("{"): raise self.PError("invalid statement") if is_zp_block: raise self.PError("ZP block cannot contain code statements") self.cur_block.statements.append(self.parse_statement(line)) elif line: match = re.fullmatch(r"(?P