From 29060f3373cd63317ab95cebe865fca26f3f8bc2 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Wed, 10 Jan 2018 00:44:11 +0100 Subject: [PATCH] expression --- README.md | 4 +- il65/compile.py | 111 ++++++++------- il65/datatypes.py | 14 +- il65/generateasm.py | 14 +- il65/main.py | 2 +- il65/optimize.py | 91 ++++++------ il65/plylex.py | 8 ++ il65/plyparse.py | 320 +++++++++++++++++++++++++++++++++++++++---- reference.md | 4 +- tests/test_parser.py | 34 ++++- todo.ill | 16 ++- 11 files changed, 466 insertions(+), 152 deletions(-) diff --git a/README.md b/README.md index 4e16c0f1c..f88f61380 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors ===================================================================================== -*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0* +*Written by Irmen de Jong (irmen@razorvine.net)* *Software license: GNU GPL 3.0, see file LICENSE* @@ -17,6 +17,7 @@ which aims to provide many conveniences over raw assembly code (even when using - subroutines have enforced input- and output parameter definitions - various data types other than just bytes (16-bit words, floats, strings, 16-bit register pairs) - automatic variable allocations, automatic string variables and string sharing +- constant folding in expressions (compile-time evaluation) - automatic type conversions - floating point operations - optional automatic preserving and restoring CPU registers state, when calling routines that otherwise would clobber these @@ -24,6 +25,7 @@ which aims to provide many conveniences over raw assembly code (even when using - breakpoints, that let the Vice emulator drop into the monitor if execution hits them - source code labels automatically loaded in Vice emulator so it can show them in disassembly - conditional gotos +- some code optimizations (such as not repeatedly loading the same value in a register) - @todo: loops - @todo: memory block operations diff --git a/il65/compile.py b/il65/compile.py index 7790993a9..996ea3b35 100644 --- a/il65/compile.py +++ b/il65/compile.py @@ -11,8 +11,9 @@ import sys import linecache from typing import Optional, Tuple, Set, Dict, Any, no_type_check import attr -from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, \ - SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions +from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, LiteralValue, \ + SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions,\ + SymbolName, process_constant_expression, process_dynamic_expression from .plylex import SourceRef, print_bold from .optimize import optimize @@ -72,11 +73,8 @@ class PlyParser: def process_all_expressions(self, module: Module) -> None: # process/simplify all expressions (constant folding etc) for block, parent in module.all_scopes(): - if block.scope: - for node in block.scope.nodes: - if node is None: - print(block, block.scope, block.scope.nodes) - node.process_expressions() + for node in block.nodes: + node.process_expressions(block.scope) @no_type_check def create_multiassigns(self, module: Module) -> None: @@ -90,24 +88,23 @@ class PlyParser: return assign for block, parent in module.all_scopes(): - if block.scope: - for node in block.scope.nodes: - if isinstance(node, Assignment): - if isinstance(node.right, Assignment): - multi = reduce_right(node) - assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment) - node.simplify_targetregisters() + for node in block.nodes: + if isinstance(node, Assignment): + if isinstance(node.right, Assignment): + multi = reduce_right(node) + assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment) + node.simplify_targetregisters() def apply_directive_options(self, module: Module) -> None: def set_save_registers(scope: Scope, save_dir: Directive) -> None: if not scope: return if len(save_dir.args) > 1: - raise ParseError("need zero or one directive argument", save_dir.sourceref) + raise ParseError("expected zero or one directive argument", save_dir.sourceref) if save_dir.args: - if save_dir.args[0] in ("yes", "true"): + if save_dir.args[0] in ("yes", "true", True): scope.save_registers = True - elif save_dir.args[0] in ("no", "false"): + elif save_dir.args[0] in ("no", "false", False): scope.save_registers = False else: raise ParseError("invalid directive args", save_dir.sourceref) @@ -120,7 +117,7 @@ class PlyParser: for directive in block.scope.filter_nodes(Directive): if directive.name == "output": if len(directive.args) != 1 or not isinstance(directive.args[0], str): - raise ParseError("need one str directive argument", directive.sourceref) + raise ParseError("expected one str directive argument", directive.sourceref) if directive.args[0] == "raw": block.format = ProgramFormat.RAW block.address = 0xc000 @@ -134,7 +131,7 @@ class PlyParser: raise ParseError("invalid directive args", directive.sourceref) elif directive.name == "address": if len(directive.args) != 1 or not isinstance(directive.args[0], int): - raise ParseError("need one integer directive argument", directive.sourceref) + raise ParseError("expected one integer directive argument", directive.sourceref) if block.format == ProgramFormat.BASIC: raise ParseError("basic cannot have a custom load address", directive.sourceref) block.address = directive.args[0] @@ -178,21 +175,20 @@ class PlyParser: def determine_subroutine_usage(self, module: Module) -> None: module.subroutine_usage.clear() for block, parent in module.all_scopes(): - if block.scope: - for node in block.scope.nodes: - if isinstance(node, InlineAssembly): - self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, block.scope) - elif isinstance(node, SubCall): - self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, block.scope) - elif isinstance(node, Goto): - self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, block.scope) - elif isinstance(node, Return): - self._parse_return_for_subroutine_usages(module.subroutine_usage, node, block.scope) - elif isinstance(node, Assignment): - self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, block.scope) + for node in block.nodes: + if isinstance(node, InlineAssembly): + self._get_subroutine_usages_from_asm(module.subroutine_usage, node, block.scope) + elif isinstance(node, SubCall): + self._get_subroutine_usages_from_subcall(module.subroutine_usage, node, block.scope) + elif isinstance(node, Goto): + self._get_subroutine_usages_from_goto(module.subroutine_usage, node, block.scope) + elif isinstance(node, Return): + self._get_subroutine_usages_from_return(module.subroutine_usage, node, block.scope) + elif isinstance(node, Assignment): + self._get_subroutine_usages_from_assignment(module.subroutine_usage, node, block.scope) - def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], - subcall: SubCall, parent_scope: Scope) -> None: + def _get_subroutine_usages_from_subcall(self, usages: Dict[Tuple[str, str], Set[str]], + subcall: SubCall, parent_scope: Scope) -> None: # node.target (relevant if its a symbolname -- a str), node.arguments (list of CallArgument) # CallArgument.value = expression. if isinstance(subcall.target.target, str): @@ -203,22 +199,31 @@ class PlyParser: name = subcall.target.target usages[(scopename, name)].add(str(subcall.sourceref)) for arg in subcall.arguments: - self._parse_expression_for_subroutine_usages(usages, arg.value, parent_scope) + self._get_subroutine_usages_from_expression(usages, arg.value, parent_scope) - def _parse_expression_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], - expr: Any, parent_scope: Scope) -> None: + def _get_subroutine_usages_from_expression(self, usages: Dict[Tuple[str, str], Set[str]], + expr: Any, parent_scope: Scope) -> None: if expr is None or isinstance(expr, (int, str, float, bool, Register)): return elif isinstance(expr, SubCall): - self._parse_subcall_for_subroutine_usages(usages, expr, parent_scope) + self._get_subroutine_usages_from_subcall(usages, expr, parent_scope) elif isinstance(expr, Expression): - self._parse_expression_for_subroutine_usages(usages, expr.left, parent_scope) - self._parse_expression_for_subroutine_usages(usages, expr.right, parent_scope) + self._get_subroutine_usages_from_expression(usages, expr.left, parent_scope) + self._get_subroutine_usages_from_expression(usages, expr.right, parent_scope) + elif isinstance(expr, LiteralValue): + return + elif isinstance(expr, SymbolName): + try: + symbol = parent_scope[expr.name] + if isinstance(symbol, Subroutine): + usages[(parent_scope.name, expr.name)].add(str(expr.sourceref)) + except LookupError: + pass else: - print("@todo parse expression for subroutine usage:", expr) # @todo + raise TypeError("unknown expr type to scan for sub usages", expr, expr.sourceref) - def _parse_goto_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], - goto: Goto, parent_scope: Scope) -> None: + def _get_subroutine_usages_from_goto(self, usages: Dict[Tuple[str, str], Set[str]], + goto: Goto, parent_scope: Scope) -> None: # node.target (relevant if its a symbolname -- a str), node.condition (expression) if isinstance(goto.target.target, str): try: @@ -227,24 +232,24 @@ class PlyParser: return if isinstance(symbol, Subroutine): usages[(parent_scope.name, symbol.name)].add(str(goto.sourceref)) - self._parse_expression_for_subroutine_usages(usages, goto.condition, parent_scope) + self._get_subroutine_usages_from_expression(usages, goto.condition, parent_scope) - def _parse_return_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], - returnnode: Return, parent_scope: Scope) -> None: + def _get_subroutine_usages_from_return(self, usages: Dict[Tuple[str, str], Set[str]], + returnnode: Return, parent_scope: Scope) -> None: # node.value_A (expression), value_X (expression), value_Y (expression) - self._parse_expression_for_subroutine_usages(usages, returnnode.value_A, parent_scope) - self._parse_expression_for_subroutine_usages(usages, returnnode.value_X, parent_scope) - self._parse_expression_for_subroutine_usages(usages, returnnode.value_Y, parent_scope) + self._get_subroutine_usages_from_expression(usages, returnnode.value_A, parent_scope) + self._get_subroutine_usages_from_expression(usages, returnnode.value_X, parent_scope) + self._get_subroutine_usages_from_expression(usages, returnnode.value_Y, parent_scope) - def _parse_assignment_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]], - assignment: Assignment, parent_scope: Scope) -> None: + def _get_subroutine_usages_from_assignment(self, usages: Dict[Tuple[str, str], Set[str]], + assignment: Assignment, parent_scope: Scope) -> None: # node.right (expression, or another Assignment) if isinstance(assignment.right, Assignment): - self._parse_assignment_for_subroutine_usages(usages, assignment.right, parent_scope) + self._get_subroutine_usages_from_assignment(usages, assignment.right, parent_scope) else: - self._parse_expression_for_subroutine_usages(usages, assignment.right, parent_scope) + self._get_subroutine_usages_from_expression(usages, assignment.right, parent_scope) - def _parse_asm_for_subroutine_usage(self, usages: Dict[Tuple[str, str], Set[str]], + def _get_subroutine_usages_from_asm(self, usages: Dict[Tuple[str, str], Set[str]], asmnode: InlineAssembly, parent_scope: Scope) -> None: # asm can refer to other symbols as well, track subroutine usage for line in asmnode.assembly.splitlines(): diff --git a/il65/datatypes.py b/il65/datatypes.py index d1f3ae28c..2c08b9d42 100644 --- a/il65/datatypes.py +++ b/il65/datatypes.py @@ -104,13 +104,13 @@ def coerce_value(datatype: DataType, value: PrimitiveType, sourceref: SourceRef= # if we're a BYTE type, and the value is a single character, convert it to the numeric value def verify_bounds(value: PrimitiveType) -> None: # if the value is out of bounds, raise an overflow exception - if datatype == DataType.BYTE and not (0 <= value <= 0xff): # type: ignore - raise OverflowError("value out of range for byte") - if datatype == DataType.WORD and not (0 <= value <= 0xffff): # type: ignore - raise OverflowError("value out of range for word") - if datatype == DataType.FLOAT and not (FLOAT_MAX_NEGATIVE <= value <= FLOAT_MAX_POSITIVE): # type: ignore - raise OverflowError("value out of range for float") - + if isinstance(value, (int, float)): + if datatype == DataType.BYTE and not (0 <= value <= 0xff): # type: ignore + raise OverflowError("value out of range for byte") + if datatype == DataType.WORD and not (0 <= value <= 0xffff): # type: ignore + raise OverflowError("value out of range for word") + if datatype == DataType.FLOAT and not (FLOAT_MAX_NEGATIVE <= value <= FLOAT_MAX_POSITIVE): # type: ignore + raise OverflowError("value out of range for float") if datatype in (DataType.BYTE, DataType.BYTEARRAY, DataType.MATRIX) and isinstance(value, str): if len(value) == 1: return True, char_to_bytevalue(value) diff --git a/il65/generateasm.py b/il65/generateasm.py index f60ce8afe..02bb74c5e 100644 --- a/il65/generateasm.py +++ b/il65/generateasm.py @@ -10,6 +10,7 @@ import subprocess import datetime import itertools from typing import Union, TextIO, List, Tuple, Iterator +from .plylex import print_bold from .plyparse import Module, ProgramFormat, Block, Directive, VarDef, Label, Subroutine, AstNode, ZpOptions from .datatypes import VarType, DataType, to_hex, mflpt5_to_float, to_mflpt5, STRING_DATATYPES @@ -47,7 +48,17 @@ class AssemblyGenerator: self.footer() def sanitycheck(self): - # duplicate block names? + start_found = False + for block, parent in self.module.all_scopes(): + for label in block.nodes: + if isinstance(label, Label) and label.name == "start" and block.name == "main": + start_found = True + break + if start_found: + break + if not start_found: + print_bold("ERROR: program entry point is missing ('start' label in 'main' block)\n") + raise SystemExit(1) all_blocknames = [b.name for b in self.module.scope.filter_nodes(Block)] unique_blocknames = set(all_blocknames) if len(all_blocknames) != len(unique_blocknames): @@ -329,6 +340,7 @@ class AssemblyGenerator: def generate_statement(self, stmt: AstNode) -> None: if isinstance(stmt, Label): self.p("\n{:s}\v\t\t; {:s}".format(stmt.name, stmt.lineref)) + self.p("\vrts") # @todo rest of the statement nodes diff --git a/il65/main.py b/il65/main.py index 4c24ada92..f64fcf964 100644 --- a/il65/main.py +++ b/il65/main.py @@ -40,7 +40,7 @@ def main() -> None: if args.nooptimize: print_bold("not optimizing the parse tree!") else: - print("\nOptimizing parse tree.") + print("\nOptimizing code.") optimize(parsed_module) print("\nGenerating assembly code.") cg = AssemblyGenerator(parsed_module) diff --git a/il65/optimize.py b/il65/optimize.py index aac3affee..b4ce9546e 100644 --- a/il65/optimize.py +++ b/il65/optimize.py @@ -28,66 +28,61 @@ class Optimizer: # and augmented assignments that have no effect (A+=0) # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false (or perhaps turn them into byte values first?) for block, parent in self.module.all_scopes(): - if block.scope: - for assignment in list(block.scope.nodes): - if isinstance(assignment, Assignment): - assignment.left = [lv for lv in assignment.left if lv != assignment.right] - if not assignment.left: - block.scope.remove_node(assignment) + for assignment in list(block.nodes): + if isinstance(assignment, Assignment): + assignment.left = [lv for lv in assignment.left if lv != assignment.right] + if not assignment.left: + block.scope.remove_node(assignment) + self.num_warnings += 1 + print_warning("{}: removed statement that has no effect".format(assignment.sourceref)) + if isinstance(assignment, AugAssignment): + if isinstance(assignment.right, (int, float)): + if assignment.right == 0 and assignment.operator in ("+=", "-=", "|=", "<<=", ">>=", "^="): self.num_warnings += 1 print_warning("{}: removed statement that has no effect".format(assignment.sourceref)) - if isinstance(assignment, AugAssignment): - if isinstance(assignment.right, (int, float)): - if assignment.right == 0 and assignment.operator in ("+=", "-=", "|=", "<<=", ">>=", "^="): - self.num_warnings += 1 - print_warning("{}: removed statement that has no effect".format(assignment.sourceref)) - block.scope.remove_node(assignment) - if assignment.right >= 8 and assignment.operator in ("<<=", ">>="): - self.num_warnings += 1 - print_warning("{}: shifting result is always zero".format(assignment.sourceref)) - new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref) - block.scope.replace_node(assignment, new_stmt) + block.scope.remove_node(assignment) + if assignment.right >= 8 and assignment.operator in ("<<=", ">>="): + print("{}: shifting result is always zero".format(assignment.sourceref)) + new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref) + block.scope.replace_node(assignment, new_stmt) def combine_assignments_into_multi(self): # fold multiple consecutive assignments with the same rvalue into one multi-assignment for block, parent in self.module.all_scopes(): - if block.scope: - rvalue = None - assignments = [] - for stmt in list(block.scope.nodes): - if isinstance(stmt, Assignment): - if assignments: - if stmt.right == rvalue: - assignments.append(stmt) - continue - elif len(assignments) > 1: - # replace the first assignment by a multi-assign with all the others - for assignment in assignments[1:]: - print("{}: joined with previous assignment".format(assignment.sourceref)) - assignments[0].left.extend(assignment.left) - block.scope.remove_node(assignment) - rvalue = None - assignments.clear() - else: - rvalue = stmt.right + rvalue = None + assignments = [] + for stmt in list(block.nodes): + if isinstance(stmt, Assignment): + if assignments: + if stmt.right == rvalue: assignments.append(stmt) + continue + elif len(assignments) > 1: + # replace the first assignment by a multi-assign with all the others + for assignment in assignments[1:]: + print("{}: joined with previous assignment".format(assignment.sourceref)) + assignments[0].left.extend(assignment.left) + block.scope.remove_node(assignment) + rvalue = None + assignments.clear() else: - rvalue = None - assignments.clear() + rvalue = stmt.right + assignments.append(stmt) + else: + rvalue = None + assignments.clear() def optimize_multiassigns(self): # optimize multi-assign statements (remove duplicate targets, optimize order) for block, parent in self.module.all_scopes(): - if block.scope: - for assignment in block.scope.nodes: - if isinstance(assignment, Assignment) and len(assignment.left) > 1: - # remove duplicates - lvalues = set(assignment.left) - if len(lvalues) != len(assignment.left): - self.num_warnings += 1 - print_warning("{}: removed duplicate assignment targets".format(assignment.sourceref)) - # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) - assignment.left = list(lvalues) + for assignment in block.nodes: + if isinstance(assignment, Assignment) and len(assignment.left) > 1: + # remove duplicates + lvalues = set(assignment.left) + if len(lvalues) != len(assignment.left): + print("{}: removed duplicate assignment targets".format(assignment.sourceref)) + # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any) + assignment.left = list(lvalues) def remove_unused_subroutines(self): # some symbols are used by the emitted assembly code from the code generator, diff --git a/il65/plylex.py b/il65/plylex.py index eb0e47ea8..0debb0a4e 100644 --- a/il65/plylex.py +++ b/il65/plylex.py @@ -61,6 +61,7 @@ tokens = ( "LOGICAND", "LOGICOR", "LOGICNOT", + "INTEGERDIVIDE", "POWER", "LABEL", "IF", @@ -73,6 +74,7 @@ literals = ['+', '-', '*', '/', '(', ')', '[', ']', '{', '}', '.', ',', '!', '?' # regex rules for simple tokens +t_INTEGERDIVIDE = r"//" t_BITAND = r"&" t_BITOR = r"\|" t_BITXOR = r"\^" @@ -219,6 +221,12 @@ def t_LABEL(t): return t +def t_BOOLEAN(t): + r"true|false" + t.value = t.value == "true" + return t + + def t_DOTTEDNAME(t): r"[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+" return t diff --git a/il65/plyparse.py b/il65/plyparse.py index 940b49e50..c3c8ad910 100644 --- a/il65/plyparse.py +++ b/il65/plyparse.py @@ -5,9 +5,12 @@ This is the parser of the IL65 code, that generates a parse tree. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ +import math +import builtins +import inspect import enum from collections import defaultdict -from typing import Union, Generator, Tuple, List, Optional, Dict +from typing import Union, Generator, Tuple, List, Optional, Dict, Any, Iterable import attr from ply.yacc import yacc from .plylex import SourceRef, tokens, lexer, find_tok_column @@ -26,15 +29,24 @@ class ZpOptions(enum.Enum): CLOBBER_RESTORE = "clobber_restore" +math_functions = {name: func for name, func in vars(math).items() if inspect.isbuiltin(func)} +builtin_functions = {name: func for name, func in vars(builtins).items() if inspect.isbuiltin(func)} + + class ParseError(Exception): def __init__(self, message: str, sourceref: SourceRef) -> None: super().__init__(message) self.sourceref = sourceref + # @todo chain attribute, a list of other exceptions, so we can have more than 1 error at a time. def __str__(self): return "{} {:s}".format(self.sourceref, self.args[0]) +class ExpressionEvaluationError(ParseError): + pass + + start = "start" @@ -69,9 +81,9 @@ class AstNode: tostr(elt, level + 2) tostr(self, 0) - def process_expressions(self) -> None: - # process/simplify all expressions (constant folding etc) @todo - # @todo override in node types that have expression(s) + def process_expressions(self, scope: 'Scope') -> None: + # process/simplify all expressions (constant folding etc) + # this is implemented in node types that have expression(s) and that should act on this. pass @@ -115,6 +127,7 @@ class Scope(AstNode): node.scope.parent_scope = self def __getitem__(self, name: str) -> AstNode: + assert isinstance(name, str) if '.' in name: # look up the dotted name starting from the topmost scope scope = self @@ -166,13 +179,13 @@ class Scope(AstNode): self._populate_symboltable(newnode) -def validate_address(object: AstNode, attrib: attr.Attribute, value: Optional[int]): +def validate_address(obj: AstNode, attrib: attr.Attribute, value: Optional[int]): if value is None: return - if isinstance(object, Block) and object.name == "ZP": - raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), object.sourceref) + if isinstance(obj, Block) and obj.name == "ZP": + raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), obj.sourceref) if value < 0x0200 or value > 0xffff: - raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), object.sourceref) + raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), obj.sourceref) @attr.s(cmp=False, repr=False) @@ -185,6 +198,12 @@ class Block(AstNode): def __attrs_post_init__(self): self.scope.name = self.name + @property + def nodes(self) -> Iterable[AstNode]: + if self.scope: + return self.scope.nodes + return [] + @property def label(self) -> str: if self.name: @@ -205,6 +224,12 @@ class Module(AstNode): address = attr.ib(type=int, init=False, default=0xc000, validator=validate_address) # can be set via directive zp_options = attr.ib(type=ZpOptions, init=False, default=ZpOptions.NOCLOBBER) # can be set via directive + @property + def nodes(self) -> Iterable[AstNode]: + if self.scope: + return self.scope.nodes + return [] + def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]: # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples. # it iterates of copies of the node collections, so it's okay to modify the scopes you iterate over. @@ -275,6 +300,9 @@ class Assignment(AstNode): new_targets.append(t) self.left = new_targets + def process_expressions(self, scope: Scope) -> None: + self.right = process_expression(self.right, scope, self.right.sourceref) + @attr.s(cmp=False, repr=False) class AugAssignment(AstNode): @@ -282,6 +310,9 @@ class AugAssignment(AstNode): operator = attr.ib(type=str) right = attr.ib() + def process_expressions(self, scope: Scope) -> None: + self.right = process_expression(self.right, scope, self.right.sourceref) + @attr.s(cmp=False, repr=False) class SubCall(AstNode): @@ -292,6 +323,11 @@ class SubCall(AstNode): def __attrs_post_init__(self): self.arguments = self.arguments or [] + def process_expressions(self, scope: Scope) -> None: + for callarg in self.arguments: + assert isinstance(callarg, CallArgument) + callarg.process_expressions(scope) + @attr.s(cmp=False, repr=False) class Return(AstNode): @@ -299,6 +335,14 @@ class Return(AstNode): value_X = attr.ib(default=None) value_Y = attr.ib(default=None) + def process_expressions(self, scope: Scope) -> None: + if self.value_A is not None: + self.value_A = process_expression(self.value_A, scope, self.value_A.sourceref) + if self.value_X is not None: + self.value_X = process_expression(self.value_X, scope, self.value_X.sourceref) + if self.value_Y is not None: + self.value_Y = process_expression(self.value_Y, scope, self.value_Y.sourceref) + @attr.s(cmp=False, repr=False) class TargetRegisters(AstNode): @@ -347,12 +391,9 @@ class VarDef(AstNode): self.value = 0 # note: value coercion is done later, when all expressions are evaluated - def process_expressions(self) -> None: - if isinstance(self.value, Expression): - # process/simplify all expressions (constant folding etc) # @todo - # verify that the expression yields a single constant value, replace value by that value # @todo - self.value = 123 # XXX - assert not isinstance(self.value, Expression) + def process_expressions(self, scope: Scope) -> None: + self.value = process_expression(self.value, scope, self.sourceref) + assert not isinstance(self.value, Expression), "processed expression for vardef should reduce to a constant value" if self.vartype in (VarType.CONST, VarType.VAR): try: _, self.value = coerce_value(self.datatype, self.value, self.sourceref) @@ -388,6 +429,12 @@ class Subroutine(AstNode): scope = attr.ib(type=Scope, default=None) address = attr.ib(type=int, default=None, validator=validate_address) + @property + def nodes(self) -> Iterable[AstNode]: + if self.scope: + return self.scope.nodes + return [] + def __attrs_post_init__(self): if self.scope and self.address is not None: raise ValueError("subroutine must have either a scope or an address, not both") @@ -401,6 +448,10 @@ class Goto(AstNode): if_stmt = attr.ib(default=None) condition = attr.ib(default=None) + def process_expressions(self, scope: Scope) -> None: + if self.condition is not None: + self.condition = process_expression(self.condition, scope, self.condition.sourceref) + @attr.s(cmp=False, repr=False) class Dereference(AstNode): @@ -420,6 +471,37 @@ class Dereference(AstNode): self.datatype = self.datatype.to_enum() +@attr.s(cmp=False, repr=False) +class LiteralValue(AstNode): + value = attr.ib() + + def __repr__(self) -> str: + return repr(self.value) + + +@attr.s(cmp=False, repr=False) +class AddressOf(AstNode): + name = attr.ib(type=str) + + +@attr.s(cmp=False, repr=False) +class IncrDecr(AstNode): + target = attr.ib() + operator = attr.ib(type=str, validator=attr.validators.in_(["++", "--"])) + howmuch = attr.ib(default=1) + + def __attrs_post_init__(self): + # make sure the amount is always >= 0 + if self.howmuch < 0: + self.howmuch = -self.howmuch + self.operator = "++" if self.operator == "--" else "--" + + +@attr.s(cmp=False, repr=False) +class SymbolName(AstNode): + name = attr.ib(type=str) + + @attr.s(cmp=False, slots=True, repr=False) class CallTarget(AstNode): target = attr.ib() @@ -431,11 +513,8 @@ class CallArgument(AstNode): value = attr.ib() name = attr.ib(type=str, default=None) - -@attr.s(cmp=False, repr=False) -class UnaryOp(AstNode): - operator = attr.ib(type=str) - operand = attr.ib() + def process_expressions(self, scope: Scope) -> None: + self.value = process_expression(self.value, scope, self.sourceref) @attr.s(cmp=False, slots=True, repr=False) @@ -443,10 +522,187 @@ class Expression(AstNode): left = attr.ib() operator = attr.ib(type=str) right = attr.ib() + unary = attr.ib(type=bool, default=False) processed_must_be_constant = attr.ib(type=bool, init=False, default=False) # does the expression have to be a constant value? - processed = attr.ib(type=bool, init=False, default=False) # has this expression been processed/simplified yet? - constant = attr.ib(type=bool, init=False, default=False) # is the processed expression a constant value? + def __attrs_post_init__(self): + assert self.operator not in ("++", "--"), "incr/decr should not be an expression" + + def process_expressions(self, scope: Scope) -> None: + raise RuntimeError("should be done via parent node's process_expressions") + + def evaluate_primitive_constants(self, scope: Scope) -> Union[int, float, str, bool]: + # make sure the lvalue and rvalue are primitives, and the operator is allowed + if not isinstance(self.left, (LiteralValue, int, float, str, bool)): + raise TypeError("left", self) + if not isinstance(self.right, (LiteralValue, int, float, str, bool)): + raise TypeError("right", self) + if self.operator not in {'+', '-', '*', '/', '//', '~', '<', '>', '<=', '>=', '==', '!='}: + raise ValueError("operator", self) + estr = "{} {} {}".format(repr(self.left), self.operator, repr(self.right)) + try: + return eval(estr, {}, {}) # safe because of checks above + except Exception as x: + raise ExpressionEvaluationError("expression error: " + str(x), self.sourceref) from None + + def print_tree(self) -> None: + def tree(expr: Any, level: int) -> str: + indent = " "*level + if not isinstance(expr, Expression): + return indent + str(expr) + "\n" + if expr.unary: + return indent + "{}{}".format(expr.operator, tree(expr.left, level+1)) + else: + return indent + "{}".format(tree(expr.left, level+1)) + \ + indent + str(expr.operator) + "\n" + \ + indent + "{}".format(tree(expr.right, level + 1)) + print(tree(self, 0)) + + +def process_expression(value: Any, scope: Scope, sourceref: SourceRef) -> Any: + # process/simplify all expressions (constant folding etc) + if isinstance(value, Expression): + must_be_constant = value.processed_must_be_constant + else: + must_be_constant = False + if must_be_constant: + return process_constant_expression(value, sourceref, scope) + else: + return process_dynamic_expression(value, sourceref, scope) + + +def process_constant_expression(expr: Any, sourceref: SourceRef, symbolscope: Scope) -> Union[int, float, str, bool]: + # the expression must result in a single (constant) value (int, float, whatever) + if expr is None or isinstance(expr, (int, float, str, bool)): + return expr + elif isinstance(expr, LiteralValue): + return expr.value + elif isinstance(expr, SymbolName): + try: + value = symbolscope[expr.name] + if isinstance(value, VarDef): + if value.vartype == VarType.MEMORY: + raise ExpressionEvaluationError("can't take a memory value, must be a constant", expr.sourceref) + value = value.value + if isinstance(value, Expression): + raise ExpressionEvaluationError("circular reference?", expr.sourceref) + elif isinstance(value, (int, float, str, bool)): + return value + else: + raise ExpressionEvaluationError("constant symbol required, not {}".format(value.__class__.__name__), expr.sourceref) + except LookupError as x: + raise ExpressionEvaluationError(str(x), expr.sourceref) from None + elif isinstance(expr, AddressOf): + assert isinstance(expr.name, SymbolName) + try: + value = symbolscope[expr.name.name] + if isinstance(value, VarDef): + if value.vartype == VarType.MEMORY: + return value.value + raise ParseError("can't take the address of this {}".format(value.__class__.__name__), expr.name.sourceref) + else: + raise ExpressionEvaluationError("constant address required, not {}".format(value.__class__.__name__), expr.name.sourceref) + except LookupError as x: + raise ParseError(str(x), expr.sourceref) from None + elif isinstance(expr, SubCall): + if isinstance(expr.target, CallTarget): + funcname = expr.target.target.name + if funcname in math_functions or funcname in builtin_functions: + if isinstance(expr.target.target, SymbolName): + func_args = [] + for a in (process_constant_expression(callarg.value, sourceref, symbolscope) for callarg in expr.arguments): + if isinstance(a, LiteralValue): + func_args.append(a.value) + else: + func_args.append(a) + func = math_functions.get(funcname, builtin_functions.get(funcname)) + try: + return func(*func_args) + except Exception as x: + raise ExpressionEvaluationError(str(x), expr.sourceref) + else: + raise ParseError("symbol name required, not {}".format(expr.target.__class__.__name__), expr.sourceref) + else: + raise ExpressionEvaluationError("can only use math- or builtin function", expr.sourceref) + else: + raise ParseError("function name required, not {}".format(expr.target.__class__.__name__), expr.sourceref) + elif not isinstance(expr, Expression): + raise ExpressionEvaluationError("constant value required, not {}".format(expr.__class__.__name__), expr.sourceref) + if expr.unary: + left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref + expr.left = process_constant_expression(expr.left, left_sourceref, symbolscope) + if isinstance(expr.left, (int, float)): + try: + if expr.operator == '-': + return -expr.left + elif expr.operator == '~': + return ~expr.left # type: ignore + elif expr.operator in ("++", "--"): + raise ValueError("incr/decr should not be an expression") + raise ValueError("invalid unary operator", expr.operator) + except TypeError as x: + raise ParseError(str(x), expr.sourceref) from None + raise ValueError("invalid operand type for unary operator", expr.left, expr.operator) + else: + left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref + expr.left = process_constant_expression(expr.left, left_sourceref, symbolscope) + right_sourceref = expr.right.sourceref if isinstance(expr.right, AstNode) else sourceref + expr.right = process_constant_expression(expr.right, right_sourceref, symbolscope) + if isinstance(expr.left, (LiteralValue, SymbolName, int, float, str, bool)): + if isinstance(expr.right, (LiteralValue, SymbolName, int, float, str, bool)): + return expr.evaluate_primitive_constants(symbolscope) + else: + raise ExpressionEvaluationError("constant value required on right, not {}" + .format(expr.right.__class__.__name__), right_sourceref) + else: + raise ExpressionEvaluationError("constant value required on left, not {}" + .format(expr.left.__class__.__name__), left_sourceref) + + +def process_dynamic_expression(expr: Any, sourceref: SourceRef, symbolscope: Scope) -> Any: + # constant-fold a dynamic expression + if expr is None or isinstance(expr, (int, float, str, bool)): + return expr + elif isinstance(expr, LiteralValue): + return expr.value + elif isinstance(expr, SymbolName): + try: + return process_constant_expression(expr, sourceref, symbolscope) + except ExpressionEvaluationError: + return expr + elif isinstance(expr, AddressOf): + try: + return process_constant_expression(expr, sourceref, symbolscope) + except ExpressionEvaluationError: + return expr + elif isinstance(expr, SubCall): + try: + return process_constant_expression(expr, sourceref, symbolscope) + except ExpressionEvaluationError: + return expr + elif isinstance(expr, Register): + return expr + elif not isinstance(expr, Expression): + raise ParseError("expression required, not {}".format(expr.__class__.__name__), expr.sourceref) + if expr.unary: + left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref + expr.left = process_dynamic_expression(expr.left, left_sourceref, symbolscope) + try: + return process_constant_expression(expr, sourceref, symbolscope) + except ExpressionEvaluationError: + return expr + else: + left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref + expr.left = process_dynamic_expression(expr.left, left_sourceref, symbolscope) + right_sourceref = expr.right.sourceref if isinstance(expr.right, AstNode) else sourceref + expr.right = process_dynamic_expression(expr.right, right_sourceref, symbolscope) + try: + return process_constant_expression(expr, sourceref, symbolscope) + except ExpressionEvaluationError: + return expr + + +# ----------------- PLY parser definition follows ---------------------- def p_start(p): """ @@ -653,7 +909,7 @@ def p_literal_value(p): | STRING | CHARACTER | BOOLEAN""" - p[0] = p[1] + p[0] = LiteralValue(value=p[1], sourceref=_token_sref(p, 1)) def p_subroutine(p): @@ -759,14 +1015,14 @@ def p_incrdecr(p): incrdecr : assignment_target INCR | assignment_target DECR """ - p[0] = UnaryOp(operator=p[2], operand=p[1], sourceref=_token_sref(p, 1)) + p[0] = IncrDecr(target=p[1], operator=p[2], sourceref=_token_sref(p, 1)) def p_call_subroutine(p): """ subroutine_call : calltarget preserveregs_opt '(' call_arguments_opt ')' """ - p[0] = SubCall(target=p[1], preserve_regs=p[2], arguments=p[4], sourceref=_token_sref(p, 1)) + p[0] = SubCall(target=p[1], preserve_regs=p[2], arguments=p[4], sourceref=_token_sref(p, 3)) def p_preserveregs_opt(p): @@ -894,7 +1150,7 @@ def p_symbolname(p): symbolname : NAME | DOTTEDNAME """ - p[0] = p[1] + p[0] = SymbolName(name=p[1], sourceref=_token_sref(p, 1)) def p_assignment(p): @@ -914,7 +1170,7 @@ def p_aug_assignment(p): precedence = ( ('left', '+', '-'), - ('left', '*', '/'), + ('left', '*', '/', 'INTEGERDIVIDE'), ('right', 'UNARY_MINUS', 'BITINVERT', "UNARY_ADDRESSOF"), ('left', "LT", "GT", "LE", "GE", "EQUALS", "NOTEQUALS"), ('nonassoc', "COMMENT"), @@ -927,6 +1183,7 @@ def p_expression(p): | expression '-' expression | expression '*' expression | expression '/' expression + | expression INTEGERDIVIDE expression | expression LT expression | expression GT expression | expression LE expression @@ -941,21 +1198,21 @@ def p_expression_uminus(p): """ expression : '-' expression %prec UNARY_MINUS """ - p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1)) + p[0] = Expression(left=p[2], operator=p[1], right=None, unary=True, sourceref=_token_sref(p, 1)) def p_expression_addressof(p): """ expression : BITAND symbolname %prec UNARY_ADDRESSOF """ - p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1)) + p[0] = AddressOf(name=p[2], sourceref=_token_sref(p, 1)) def p_unary_expression_bitinvert(p): """ expression : BITINVERT expression """ - p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1)) + p[0] = Expression(left=p[2], operator=p[1], right=None, unary=True, sourceref=_token_sref(p, 1)) def p_expression_group(p): @@ -1012,7 +1269,10 @@ def p_error(p): print('\n[ERROR DEBUG: parser state={:d} stack: {} . {} ]'.format(parser.state, stack_state_str, p)) if p: sref = SourceRef(p.lexer.source_filename, p.lineno, find_tok_column(p)) - p.lexer.error_function(sref, "syntax error before '{:.20s}'", str(p.value)) + if p.value in ("", "\n"): + p.lexer.error_function(sref, "syntax error before end of line") + else: + p.lexer.error_function(sref, "syntax error before '{:.20s}'", str(p.value).rstrip()) else: lexer.error_function(None, "syntax error at end of input", lexer.source_filename) diff --git a/reference.md b/reference.md index a7ee32150..a86cc7dbf 100644 --- a/reference.md +++ b/reference.md @@ -1,7 +1,7 @@ IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors ===================================================================================== -*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0* +*Written by Irmen de Jong (irmen@razorvine.net)* *Software license: GNU GPL 3.0, see file LICENSE* @@ -17,6 +17,7 @@ which aims to provide many conveniences over raw assembly code (even when using - subroutines have enforced input- and output parameter definitions - various data types other than just bytes (16-bit words, floats, strings, 16-bit register pairs) - automatic variable allocations, automatic string variables and string sharing +- constant folding in expressions (compile-time evaluation) - automatic type conversions - floating point operations - optional automatic preserving and restoring CPU registers state, when calling routines that otherwise would clobber these @@ -24,6 +25,7 @@ which aims to provide many conveniences over raw assembly code (even when using - breakpoints, that let the Vice emulator drop into the monitor if execution hits them - source code labels automatically loaded in Vice emulator so it can show them in disassembly - conditional gotos +- some code optimizations (such as not repeatedly loading the same value in a register) - @todo: loops - @todo: memory block operations diff --git a/tests/test_parser.py b/tests/test_parser.py index da479b8ff..d44f8f480 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -1,5 +1,5 @@ -from il65.plylex import lexer, tokens, find_tok_column, literals, reserved -from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return +from il65.plylex import lexer, tokens, find_tok_column, literals, reserved, SourceRef +from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return, Scope, VarDef, Expression, LiteralValue, Label def test_lexer_definitions(): @@ -26,6 +26,7 @@ test_source = """ %output prg, sys ; comment + var foo = 42+true var .matrix(20,30) m = 9.234556 ;comment2 @@ -47,6 +48,7 @@ def test_lexer(): assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', 'ENDL', 'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL', 'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', + 'VARTYPE', 'NAME', 'IS', 'INTEGER', '+', 'BOOLEAN', 'ENDL', 'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'ENDL', 'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', 'ENDL', 'ENDL', 'ENDL', '}', 'ENDL'] @@ -56,6 +58,10 @@ def test_lexer(): assert directive_token.lineno == 9 assert directive_token.lexpos == lexer.lexdata.index("%import") assert find_tok_column(directive_token) == 10 + bool_token = tokens[23] + assert bool_token.type == "BOOLEAN" + assert type(bool_token.value) is bool + assert bool_token.value == True def test_tokenfilter(): @@ -72,6 +78,7 @@ def test_tokenfilter(): assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL', 'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', + 'VARTYPE', 'NAME', 'IS', 'INTEGER', '+', 'BOOLEAN', 'ENDL', 'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', '}', 'ENDL'] @@ -93,10 +100,17 @@ def test_parser(): block = result.scope["block"] assert isinstance(block, Block) assert block.name == "block" + assert block.nodes is block.scope.nodes + bool_vdef = block.scope.nodes[1] + assert isinstance(bool_vdef, VarDef) + assert isinstance(bool_vdef.value, Expression) + assert isinstance(bool_vdef.value.right, LiteralValue) + assert isinstance(bool_vdef.value.right.value, bool) + assert bool_vdef.value.right.value == True assert block.address == 49152 sub2 = block.scope["calculate"] assert sub2 is sub - assert sub2.lineref == "src l. 18" + assert sub2.lineref == "src l. 19" all_scopes = list(result.all_scopes()) assert len(all_scopes) == 3 assert isinstance(all_scopes[0][0], Module) @@ -108,4 +122,16 @@ def test_parser(): stmt = list(all_scopes[2][0].scope.filter_nodes(Return)) assert len(stmt) == 1 assert isinstance(stmt[0], Return) - assert stmt[0].lineref == "src l. 19" + assert stmt[0].lineref == "src l. 20" + + +def test_block_nodes(): + sref = SourceRef("file", 1, 1) + sub1 = Subroutine(name="subaddr", param_spec=[], result_spec=[], address=0xc000, sourceref=sref) + sub2 = Subroutine(name="subblock", param_spec=[], result_spec=[], + scope=Scope(nodes=[Label(name="start", sourceref=sref)], sourceref=sref), sourceref=sref) + assert sub1.scope is None + assert sub1.nodes == [] + assert sub2.scope is not None + assert len(sub2.scope.nodes) > 0 + assert sub2.nodes is sub2.scope.nodes diff --git a/todo.ill b/todo.ill index 54f9748db..62bc5c082 100644 --- a/todo.ill +++ b/todo.ill @@ -10,14 +10,18 @@ %saveregisters true - const num = 2 - var var1 =2 - var .word wvar1 = 2 + foo() ; @todo constant check error + const num = 2 + max(2, 8, 3.44//3) + const pi_val = 22/7 - 2.23423 + var var1 =2 + 9/4 + var .word wvar2 = 2 + cos(23.2) + memory memvar = $d020 + var .word test2b = &memvar + var test3 = var1 start: - wvar1 = 2+foo() + wvar1 = 2+foo()+emptysub2 A=math.randbyte() A += c64.RASTER @@ -212,11 +216,11 @@ sub emptysub () -> () { %saveregisters %breakpoint - return + return 999990 + (2*sin(1.0)) + foo(), 999990 -1, 999999 } ~ { ;sdfsdf - return + return 999, -1, 3.445 ;sdfsdf }