""" Programming Language for 6502/6510 microprocessors Here are the symbol (name) operations such as lookups, datatype definitions. Written by Irmen de Jong (irmen@razorvine.net) License: GNU GPL 3.0, see LICENSE """ import inspect import math import enum import builtins from functools import total_ordering from typing import Optional, Set, Union, Tuple, Dict, Iterable, Sequence, Any, List PrimitiveType = Union[int, float, str] REGISTER_SYMBOLS = {"A", "X", "Y", "AX", "AY", "XY", "SC", "SI"} REGISTER_SYMBOLS_RETURNVALUES = REGISTER_SYMBOLS | {"SZ"} REGISTER_BYTES = {"A", "X", "Y"} REGISTER_SBITS = {"SC", "SI", "SZ"} REGISTER_WORDS = {"AX", "AY", "XY"} # 5-byte cbm MFLPT format limitations: FLOAT_MAX_POSITIVE = 1.7014118345e+38 FLOAT_MAX_NEGATIVE = -1.7014118345e+38 RESERVED_NAMES = {'true', 'false', 'var', 'memory', 'const', 'asm', 'byte', 'word', 'float'} RESERVED_NAMES |= REGISTER_SYMBOLS MATH_SYMBOLS = {name for name in dir(math) if name[0].islower()} BUILTIN_SYMBOLS = {name for name in dir(builtins) if name[0].islower()} @total_ordering class DataType(enum.Enum): """The possible data types of values""" BYTE = 1 WORD = 2 FLOAT = 3 BYTEARRAY = 4 WORDARRAY = 5 MATRIX = 6 STRING = 7 STRING_P = 8 STRING_S = 9 STRING_PS = 10 def assignable_from_value(self, value: PrimitiveType) -> bool: if isinstance(value, (int, float)): if self == DataType.BYTE: return 0 <= value < 0x100 if self == DataType.WORD: return 0 <= value < 0x10000 if self == DataType.FLOAT: return type(value) in (float, int) return False def __lt__(self, other): if self.__class__ == other.__class__: return self.value < other.value return NotImplemented STRING_DATATYPES = {DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS} class SymbolError(Exception): pass _identifier_seq_nr = 0 class SourceRef: __slots__ = ("file", "line", "column") def __init__(self, file: str, line: int, column: int=0) -> None: self.file = file self.line = line self.column = column def __str__(self) -> str: if self.column: return "{:s}:{:d}:{:d}".format(self.file, self.line, self.column) if self.line: return "{:s}:{:d}".format(self.file, self.line) return self.file def copy(self) -> 'SourceRef': return SourceRef(self.file, self.line, self.column) class SymbolDefinition: def __init__(self, blockname: str, name: str, sourceref: SourceRef, allocate: bool) -> None: self.blockname = blockname self.name = name self.sourceref = sourceref.copy() self.allocate = allocate # set to false if the variable is memory mapped (or a constant) instead of allocated global _identifier_seq_nr self.seq_nr = _identifier_seq_nr _identifier_seq_nr += 1 def __lt__(self, other: 'SymbolDefinition') -> bool: if not isinstance(other, SymbolDefinition): return NotImplemented return (self.blockname, self.name, self.seq_nr) < (other.blockname, other.name, self.seq_nr) def __str__(self): return "<{:s} {:s}.{:s}>".format(self.__class__.__name__, self.blockname, self.name) class LabelDef(SymbolDefinition): pass class VariableDef(SymbolDefinition): # if address is None, it's a dynamically allocated variable. # if address is not None, it's a memory mapped variable (=memory address referenced by a name). def __init__(self, blockname: str, name: str, sourceref: SourceRef, datatype: DataType, allocate: bool, *, value: PrimitiveType, length: int, address: Optional[int]=None, register: str=None, matrixsize: Tuple[int, int]=None) -> None: super().__init__(blockname, name, sourceref, allocate) self.type = datatype self.address = address self.length = length self.value = value self.register = register self.matrixsize = matrixsize @property def is_memmap(self): return self.address is not None def __repr__(self): return ""\ .format(self.blockname, self.name, self.type, str(self.address), str(self.length), str(self.value)) def __lt__(self, other: 'SymbolDefinition') -> bool: if not isinstance(other, VariableDef): return NotImplemented v1 = (self.blockname, self.name or "", self.address or 0, self.seq_nr) v2 = (other.blockname, other.name or "", other.address or 0, self.seq_nr) return v1 < v2 class ConstantDef(SymbolDefinition): def __init__(self, blockname: str, name: str, sourceref: SourceRef, datatype: DataType, *, value: PrimitiveType, length: int) -> None: super().__init__(blockname, name, sourceref, False) self.type = datatype self.length = length self.value = value def __repr__(self): return ""\ .format(self.blockname, self.name, self.type, str(self.length), str(self.value)) def __lt__(self, other: 'SymbolDefinition') -> bool: if not isinstance(other, ConstantDef): return NotImplemented v1 = (str(self.value) or "", self.blockname, self.name or "", self.seq_nr) v2 = (str(other.value) or "", other.blockname, other.name or "", self.seq_nr) return v1 < v2 class SubroutineDef(SymbolDefinition): def __init__(self, blockname: str, name: str, sourceref: SourceRef, parameters: Sequence[Tuple[str, str]], returnvalues: Sequence[str], address: Optional[int]=None, sub_block: Any=None) -> None: super().__init__(blockname, name, sourceref, False) self.address = address self.sub_block = sub_block # this is a ParseResult.Block self.parameters = parameters self.clobbered_registers = set() # type: Set[str] self.return_registers = [] # type: List[str] # ordered! for _, param in parameters: if param in REGISTER_BYTES | REGISTER_SBITS: self.clobbered_registers.add(param) elif param in REGISTER_WORDS: self.clobbered_registers.add(param[0]) self.clobbered_registers.add(param[1]) else: raise SymbolError("invalid parameter spec: " + param) for register in returnvalues: if register in REGISTER_SYMBOLS_RETURNVALUES: self.return_registers.append(register) if len(register) == 1: self.clobbered_registers.add(register) else: self.clobbered_registers.add(register[0]) self.clobbered_registers.add(register[1]) elif register[-1] == "?": for r in register[:-1]: if r not in REGISTER_SYMBOLS_RETURNVALUES: raise SymbolError("invalid return value spec: " + r) if len(r) == 1: self.clobbered_registers.add(r) else: self.clobbered_registers.add(r[0]) self.clobbered_registers.add(r[1]) else: raise SymbolError("invalid return value spec: " + register) class Zeropage: SCRATCH_B1 = 0x02 SCRATCH_B2 = 0x03 def __init__(self) -> None: self.unused_bytes = [] # type: List[int] self.unused_words = [] # type: List[int] def configure(self, clobber_zp: bool = False) -> None: if clobber_zp: self.unused_bytes = list(range(0x04, 0x80)) self.unused_words = list(range(0x80, 0x100, 2)) else: # these are valid for the C-64: # ($02 and $03 are reserved as scratch addresses for various routines) self.unused_bytes = [0x06, 0x0a, 0x2a, 0x52, 0x93] # 5 zp variables (8 bits each) self.unused_words = [0x04, 0xf7, 0xf9, 0xfb, 0xfd] # 5 zp variables (16 bits each) assert self.SCRATCH_B1 not in self.unused_bytes and self.SCRATCH_B1 not in self.unused_words assert self.SCRATCH_B2 not in self.unused_bytes and self.SCRATCH_B2 not in self.unused_words def get_unused_byte(self): return self.unused_bytes.pop() def get_unused_word(self): return self.unused_words.pop() @property def available_byte_vars(self) -> int: return len(self.unused_bytes) @property def available_word_vars(self) -> int: return len(self.unused_words) # the single, global Zeropage object zeropage = Zeropage() class SymbolTable: def __init__(self, name: str, parent: Optional['SymbolTable'], owning_block: Any) -> None: self.name = name self.symbols = {} # type: Dict[str, Union[SymbolDefinition, SymbolTable]] self.parent = parent self.owning_block = owning_block self.eval_dict = None def __iter__(self): yield from self.symbols.values() def __getitem__(self, symbolname: str) -> Union[SymbolDefinition, 'SymbolTable']: return self.symbols[symbolname] def __contains__(self, symbolname: str) -> bool: return symbolname in self.symbols def lookup(self, dottedname: str, include_builtin_names: bool=False) -> Tuple['SymbolTable', Union[SymbolDefinition, 'SymbolTable']]: # Tries to find the dottedname in the current symbol table (if it is not scoped), # or globally if it is scoped (=contains a '.'). If required, math and builtin symbols # such as 'sin' or 'max' are also resolved. # Does NOT utilize a symbol table from a preprocessing parse phase, only looks in the current. nameparts = dottedname.split('.') if not nameparts[0]: nameparts = nameparts[1:] if len(nameparts) == 1: try: return self, self.symbols[nameparts[0]] except LookupError: if include_builtin_names: if nameparts[0] in MATH_SYMBOLS: return self, getattr(math, nameparts[0]) elif nameparts[0] in BUILTIN_SYMBOLS: return self, getattr(builtins, nameparts[0]) raise SymbolError("undefined symbol '{:s}'".format(nameparts[0])) from None # restart from global namespace: scope = self while scope.parent: scope = scope.parent for namepart in nameparts[:-1]: try: scope = scope.symbols[namepart] # type: ignore assert scope.name == namepart except LookupError: raise SymbolError("undefined block '{:s}'".format(namepart)) from None if isinstance(scope, SymbolTable): return scope.lookup(nameparts[-1]) elif isinstance(scope, SubroutineDef): return scope.sub_block.symbols.lookup_with_ppsymbols(nameparts[-1]) else: raise SymbolError("invalid block name '{:s}' in dotted name".format(namepart)) def get_address(self, name: str) -> int: scope, symbol = self.lookup(name) if isinstance(symbol, ConstantDef): raise SymbolError("cannot take the address of a constant") if not symbol or not isinstance(symbol, VariableDef): raise SymbolError("no var or const defined by that name") if symbol.address is None: raise SymbolError("can only take address of memory mapped variables") return symbol.address def as_eval_dict(self, ppsymbols: 'SymbolTable') -> Dict[str, Any]: # return a dictionary suitable to be passed as locals or globals to eval() if self.eval_dict is None: d = EvalSymbolDict(self, ppsymbols) self.eval_dict = d # type: ignore return self.eval_dict def iter_variables(self) -> Iterable[VariableDef]: yield from sorted((v for v in self.symbols.values() if isinstance(v, VariableDef))) def iter_constants(self) -> Iterable[ConstantDef]: yield from sorted((v for v in self.symbols.values() if isinstance(v, ConstantDef))) def iter_subroutines(self, userdefined_only: bool=False) -> Iterable[SubroutineDef]: if userdefined_only: yield from sorted((sub for sub in self.symbols.values() if isinstance(sub, SubroutineDef) and sub.address is None and sub.sub_block is not None)) else: yield from sorted((sub for sub in self.symbols.values() if isinstance(sub, SubroutineDef))) def iter_labels(self) -> Iterable[LabelDef]: yield from sorted((v for v in self.symbols.values() if isinstance(v, LabelDef))) def check_identifier_valid(self, name: str, sourceref: SourceRef) -> None: if not name.isidentifier(): raise SymbolError("invalid identifier") identifier = self.symbols.get(name, None) if identifier: if isinstance(identifier, SymbolDefinition): raise SymbolError("identifier was already defined at " + str(identifier.sourceref)) elif isinstance(identifier, SymbolTable): raise SymbolError("identifier already defined as block at " + str(identifier.owning_block.sourceref)) raise SymbolError("identifier already defined as " + str(type(identifier))) if name in MATH_SYMBOLS: print("warning: {}: identifier shadows a name from the math module".format(sourceref)) elif name in BUILTIN_SYMBOLS: print("warning: {}: identifier shadows a builtin name".format(sourceref)) def define_variable(self, name: str, sourceref: SourceRef, datatype: DataType, *, address: int=None, length: int=0, value: PrimitiveType=0, matrixsize: Tuple[int, int]=None, register: str=None) -> None: # this defines a new variable and also checks if the prefill value is allowed for the variable type. assert value is not None self.check_identifier_valid(name, sourceref) range_error = check_value_in_range(datatype, register, length, value) if range_error: raise ValueError(range_error) allocate = address is None if datatype == DataType.BYTE: if allocate and self.name == "ZP": try: address = zeropage.get_unused_byte() except LookupError: raise SymbolError("too many global 8-bit variables in ZP") self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.BYTE, allocate, value=value, length=1, address=address) elif datatype == DataType.WORD: if allocate and self.name == "ZP": try: address = zeropage.get_unused_word() except LookupError: raise SymbolError("too many global 16-bit variables in ZP") self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.WORD, allocate, value=value, length=1, address=address) elif datatype == DataType.FLOAT: if allocate and self.name == "ZP": raise SymbolError("floats cannot be stored in the ZP") self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.FLOAT, allocate, value=value, length=1, address=address) elif datatype == DataType.BYTEARRAY: self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.BYTEARRAY, allocate, value=value, length=length, address=address) elif datatype == DataType.WORDARRAY: self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.WORDARRAY, allocate, value=value, length=length, address=address) elif datatype in (DataType.STRING, DataType.STRING_P, DataType.STRING_S, DataType.STRING_PS): self.symbols[name] = VariableDef(self.name, name, sourceref, datatype, True, value=value, length=len(value)) # type: ignore elif datatype == DataType.MATRIX: assert isinstance(matrixsize, tuple) length = matrixsize[0] * matrixsize[1] self.symbols[name] = VariableDef(self.name, name, sourceref, DataType.MATRIX, allocate, value=value, length=length, address=address, matrixsize=matrixsize) else: raise ValueError("unknown type " + str(datatype)) self.eval_dict = None def define_sub(self, name: str, sourceref: SourceRef, parameters: Sequence[Tuple[str, str]], returnvalues: Sequence[str], address: Optional[int], sub_block: Any) -> None: self.check_identifier_valid(name, sourceref) self.symbols[name] = SubroutineDef(self.name, name, sourceref, parameters, returnvalues, address, sub_block) def discard_sub(self, name: str) -> None: sub = self.symbols[name] if isinstance(sub, SubroutineDef): del self.symbols[name] else: raise TypeError("not a subroutine") def define_label(self, name: str, sourceref: SourceRef) -> None: self.check_identifier_valid(name, sourceref) self.symbols[name] = LabelDef(self.name, name, sourceref, False) def define_scope(self, scope: 'SymbolTable', sourceref: SourceRef) -> None: self.check_identifier_valid(scope.name, sourceref) self.symbols[scope.name] = scope def define_constant(self, name: str, sourceref: SourceRef, datatype: DataType, *, length: int=0, value: PrimitiveType=0) -> None: # this defines a new constant and also checks if the value is allowed for the data type. assert value is not None self.check_identifier_valid(name, sourceref) range_error = check_value_in_range(datatype, "", length, value) if range_error: raise ValueError(range_error) if datatype in (DataType.BYTE, DataType.WORD, DataType.FLOAT): self.symbols[name] = ConstantDef(self.name, name, sourceref, datatype, value=value, length=length or 1) elif datatype in STRING_DATATYPES: strlen = len(value) # type: ignore self.symbols[name] = ConstantDef(self.name, name, sourceref, datatype, value=value, length=strlen) else: raise ValueError("invalid data type for constant: " + str(datatype)) self.eval_dict = None def merge_roots(self, other_root: 'SymbolTable') -> None: assert self.parent is None and other_root.parent is None for name, thing in other_root.symbols.items(): if isinstance(thing, SymbolTable): try: self.define_scope(thing, thing.owning_block.sourceref) except SymbolError as x: raise SymbolError("problematic symbol '{:s}' from {}; {:s}" .format(thing.name, thing.owning_block.sourceref, str(x))) from None def print_table(self) -> None: def print_symbols(symbols: 'SymbolTable', level: int) -> None: indent = '\t' * level print("\n" + indent + "BLOCK:", symbols.name) for name, s in sorted(symbols.symbols.items(), key=lambda x: str(getattr(x[1], "sourceref", ""))): if isinstance(s, SymbolTable): print_symbols(s, level + 1) elif isinstance(s, SubroutineDef): print(indent * 2 + "SUB: " + s.name, s.sourceref, sep="\t") elif isinstance(s, LabelDef): print(indent * 2 + "LABEL: " + s.name, s.sourceref, sep="\t") elif isinstance(s, VariableDef): print(indent * 2 + "VAR: " + s.name, s.sourceref, s.type, sep="\t") elif isinstance(s, ConstantDef): print(indent * 2 + "CONST: " + s.name, s.sourceref, s.type, sep="\t") else: raise TypeError("invalid symbol def type", s) print("\nSymbols defined in the symbol table:") print("------------------------------------") print_symbols(self, 0) print() class EvalSymbolDict(dict): def __init__(self, symboltable: SymbolTable, ppsymbols: SymbolTable, constant: bool=True) -> None: super().__init__() self._symboltable = symboltable self._ppsymbols = ppsymbols self._is_constant = constant def __getattr__(self, name): return self.__getitem__(name) def __getitem__(self, name): if name[0] != '_' and name in builtins.__dict__: return builtins.__dict__[name] try: scope, symbol = self._symboltable.lookup(name) except (LookupError, SymbolError): # attempt lookup from global scope global_scope = self._symboltable while global_scope.parent: global_scope = global_scope.parent try: scope, symbol = global_scope.lookup(name, True) except (LookupError, SymbolError): # try the ppsymbols if self._ppsymbols: return self._ppsymbols.as_eval_dict(None)[name] raise SymbolError("undefined symbol '{:s}'".format(name)) from None if self._is_constant: if isinstance(symbol, ConstantDef): return symbol.value elif isinstance(symbol, VariableDef): raise SymbolError("can't reference a variable inside a (constant) expression") elif inspect.isbuiltin(symbol): return symbol elif isinstance(symbol, SymbolTable): return symbol.as_eval_dict(self._ppsymbols) elif isinstance(symbol, (LabelDef, SubroutineDef)): raise SymbolError("can't reference a label or subroutine inside a (constant) expression") else: raise SymbolError("invalid symbol type referenced " + repr(symbol)) else: raise SymbolError("no support for non-constant expression evaluation yet") def check_value_in_range(datatype: DataType, register: str, length: int, value: PrimitiveType) -> Optional[str]: if register: if register in REGISTER_BYTES: if value < 0 or value > 0xff: # type: ignore return "value out of range, must be (unsigned) byte for a single register" elif register in REGISTER_SBITS: if value not in (0, 1): return "value out of range, must be 0 or 1 for a status bit register" elif register in REGISTER_WORDS: if value is None and datatype in (DataType.BYTE, DataType.WORD): return None if value < 0 or value > 0xffff: # type: ignore return "value out of range, must be (unsigned) word for 2 combined registers" else: return "strange register" elif datatype in (DataType.BYTE, DataType.BYTEARRAY, DataType.MATRIX): if value is None and datatype == DataType.BYTE: return None if value < 0 or value > 0xff: # type: ignore return "value out of range, must be (unsigned) byte" elif datatype in (DataType.WORD, DataType.WORDARRAY): if value is None and datatype in (DataType.BYTE, DataType.WORD): return None if value < 0 or value > 0xffff: # type: ignore return "value out of range, must be (unsigned) word" elif datatype in STRING_DATATYPES: if type(value) is not str: return "value must be a string" elif datatype == DataType.FLOAT: if type(value) not in (int, float): return "value must be a number" else: raise SymbolError("missing value check for type", datatype, register, length, value) return None # all ok ! def char_to_bytevalue(character: str, petscii: bool=True) -> int: assert len(character) == 1 if petscii: return ord(character.translate(ascii_to_petscii_trans)) else: raise NotImplementedError("screencode conversion not yet implemented for chars") # ASCII/UNICODE-to-PETSCII translation table # Unicode symbols supported that map to a PETSCII character: £ ↑ ← ♠ ♥ ♦ ♣ π ● ○ and various others ascii_to_petscii_trans = str.maketrans({ '\f': 147, # form feed becomes ClearScreen "{clear}" '\n': 13, # line feed becomes a RETURN "{cr}" (not a line feed) '\r': 17, # CR becomes CursorDown "{down}" 'a': 65, 'b': 66, 'c': 67, 'd': 68, 'e': 69, 'f': 70, 'g': 71, 'h': 72, 'i': 73, 'j': 74, 'k': 75, 'l': 76, 'm': 77, 'n': 78, 'o': 79, 'p': 80, 'q': 81, 'r': 82, 's': 83, 't': 84, 'u': 85, 'v': 86, 'w': 87, 'x': 88, 'y': 89, 'z': 90, 'A': 97, 'B': 98, 'C': 99, 'D': 100, 'E': 101, 'F': 102, 'G': 103, 'H': 104, 'I': 105, 'J': 106, 'K': 107, 'L': 108, 'M': 109, 'N': 110, 'O': 111, 'P': 112, 'Q': 113, 'R': 114, 'S': 115, 'T': 116, 'U': 117, 'V': 118, 'W': 119, 'X': 120, 'Y': 121, 'Z': 122, '{': 179, # left squiggle '}': 235, # right squiggle '£': 92, # pound currency sign '^': 94, # up arrow '~': 126, # pi math symbol 'π': 126, # pi symbol '`': 39, # single quote '✓': 250, # check mark '|': 221, # vertical bar '│': 221, # vertical bar '─': 96, # horizontal bar '┼': 123, # vertical and horizontal bar '↑': 94, # up arrow '←': 95, # left arrow '▔': 163, # upper bar '_': 164, # lower bar (underscore) '▁': 164, # lower bar '▎': 165, # left bar '♠': 97, # spades '●': 113, # circle '♥': 115, # hearts '○': 119, # open circle '♣': 120, # clubs '♦': 122, # diamonds '├': 171, # vertical and right '┤': 179, # vertical and left '┴': 177, # horiz and up '┬': 178, # horiz and down '└': 173, # up right '┐': 174, # down left '┌': 175, # down right '┘': 189, # up left '▗': 172, # block lr '▖': 187, # block ll '▝': 188, # block ur '▘': 190, # block ul '▚': 191, # block ul and lr '▌': 161, # left half '▄': 162, # lower half '▒': 230, # raster })