optimize

2024-07-05 06:29:02 +00:00 · 2018-01-08 01:51:36 +01:00 · 2018-01-08 01:51:36 +01:00 · 7d8c2bf161
commit 7d8c2bf161
parent 12c3ddd207
6 changed files with 302 additions and 57 deletions
--- a/il65/compiler.py
+++ b/il65/compiler.py
@ -1,9 +1,20 @@
 """
 Programming Language for 6502/6510 microprocessors
 This is the compiler of the IL65 code, that prepares the parse tree for code generation.
 Written by Irmen de Jong (irmen@razorvine.net)
 License: GNU GPL 3.0, see LICENSE
 """
 import re
 import os
 import sys
 import linecache
-from typing import Optional, Generator, Tuple, Set
+from typing import Optional, Tuple, Set, Dict, Any, List
-from .plyparser import parse_file, Module, Directive, Block, Subroutine, AstNode
+from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
-from .plylexer import SourceRef
+    SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters
 from .plylexer import SourceRef, print_bold
 from .optimizer import optimize
 class ParseError(Exception):
@ -26,41 +37,138 @@ class PlyParser:
        module = parse_file(filename, self.lexer_error)
        try:
            self.check_directives(module)
            self.remove_empty_blocks(module)
            self.process_imports(module)
            self.create_multiassigns(module)
            if not self.parsing_import:
                self.determine_subroutine_usage(module)
        except ParseError as x:
            self.handle_parse_error(x)
        if self.parse_errors:
-            self.print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors))
+            print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors))
            raise SystemExit(1)
        return module
    def lexer_error(self, sourceref: SourceRef, fmtstring: str, *args: str) -> None:
        self.parse_errors += 1
-        self.print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
+        print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
-    def remove_empty_blocks(self, module: Module) -> None:
+    def create_multiassigns(self, module: Module) -> None:
-        # remove blocks without name and without address, or that are empty
+        # create multi-assign statements from nested assignments (A=B=C=5),
-        for scope, parent in self.recurse_scopes(module):
+        # and optimize TargetRegisters down to single Register if it's just one register.
-            if isinstance(scope, (Subroutine, Block)):
+        def simplify_targetregisters(targets: List[Any]) -> List[Any]:
-                if not scope.scope:
+            new_targets = []
-                    continue
+            for t in targets:
-                if all(isinstance(n, Directive) for n in scope.scope.nodes):
+                if isinstance(t, TargetRegisters) and len(t.registers) == 1:
-                    empty = True
+                    t = t.registers[0]
-                    for n in scope.scope.nodes:
+                new_targets.append(t)
-                        empty = empty and n.name not in {"asmbinary", "asminclude"}
+            return new_targets
-                    if empty:
+
-                        self.print_warning("ignoring empty block or subroutine", scope.sourceref)
+        def reduce_right(assign: Assignment) -> Assignment:
-                        assert isinstance(parent, (Block, Module))
+            if isinstance(assign.right, Assignment):
-                        parent.scope.nodes.remove(scope)
+                right = reduce_right(assign.right)
-            if isinstance(scope, Block):
+                targets = simplify_targetregisters(right.left)
-                if not scope.name and scope.address is None:
+                assign.left.extend(targets)
-                    self.print_warning("ignoring block without name and address", scope.sourceref)
+                assign.right = right.right
-                    assert isinstance(parent, Module)
+            return assign
-                    parent.scope.nodes.remove(scope)
+
        for mnode, parent in module.all_scopes():
            if mnode.scope:
                for node in mnode.scope.nodes:
                    if isinstance(node, Assignment):
                        node.left = simplify_targetregisters(node.left)
                        if isinstance(node.right, Assignment):
                            multi = reduce_right(node)
                            assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
    def determine_subroutine_usage(self, module: Module) -> None:
        module.subroutine_usage.clear()
        for mnode, parent in module.all_scopes():
            if mnode.scope:
                for node in mnode.scope.nodes:
                    if isinstance(node, InlineAssembly):
                        self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope)
                    elif isinstance(node, SubCall):
                        self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
                    elif isinstance(node, Goto):
                        self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
                    elif isinstance(node, Return):
                        self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
                    elif isinstance(node, Assignment):
                        self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
    def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                             subcall: SubCall, parent_scope: Scope) -> None:
        # node.target (relevant if its a symbolname -- a str), node.arguments (list of CallArgument)
        #   CallArgument.value = expression.
        if isinstance(subcall.target.target, str):
            try:
                scopename, name = subcall.target.target.split('.')
            except ValueError:
                scopename = parent_scope.name
                name = subcall.target.target
            usages[(scopename, name)].add(str(subcall.sourceref))
        for arg in subcall.arguments:
            self._parse_expression_for_subroutine_usages(usages, arg.value, parent_scope)
    def _parse_expression_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                                expr: Any, parent_scope: Scope) -> None:
        if expr is None or isinstance(expr, (int, str, float, bool, Register)):
            return
        elif isinstance(expr, SubCall):
            self._parse_subcall_for_subroutine_usages(usages, expr, parent_scope)
        elif isinstance(expr, Expression):
            self._parse_expression_for_subroutine_usages(usages, expr.left, parent_scope)
            self._parse_expression_for_subroutine_usages(usages, expr.right, parent_scope)
        else:
            print("@todo parse expression for subroutine usage:", expr)    # @todo
    def _parse_goto_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                          goto: Goto, parent_scope: Scope) -> None:
        # node.target (relevant if its a symbolname -- a str), node.condition (expression)
        if isinstance(goto.target.target, str):
            try:
                symbol = parent_scope[goto.target.target]
            except LookupError:
                return
            if isinstance(symbol, Subroutine):
                usages[(parent_scope.name, symbol.name)].add(str(goto.sourceref))
        self._parse_expression_for_subroutine_usages(usages, goto.condition, parent_scope)
    def _parse_return_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                            returnnode: Return, parent_scope: Scope) -> None:
        # node.value_A (expression), value_X (expression), value_Y (expression)
        self._parse_expression_for_subroutine_usages(usages, returnnode.value_A, parent_scope)
        self._parse_expression_for_subroutine_usages(usages, returnnode.value_X, parent_scope)
        self._parse_expression_for_subroutine_usages(usages, returnnode.value_Y, parent_scope)
    def _parse_assignment_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
                                                assignment: Assignment, parent_scope: Scope) -> None:
        # node.right (expression, or another Assignment)
        if isinstance(assignment.right, Assignment):
            self._parse_assignment_for_subroutine_usages(usages, assignment.right, parent_scope)
        else:
            self._parse_expression_for_subroutine_usages(usages, assignment.right, parent_scope)
    def _parse_asm_for_subroutine_usage(self, usages: Dict[Tuple[str, str], Set[str]],
                                        asmnode: InlineAssembly, parent_scope: Scope) -> None:
        # asm can refer to other symbols as well, track subroutine usage
        for line in asmnode.assembly.splitlines():
            splits = line.split(maxsplit=1)
            if len(splits) == 2:
                for match in re.finditer(r"(?P<symbol>[a-zA-Z_$][a-zA-Z0-9_\.]+)", splits[1]):
                    name = match.group("symbol")
                    if name[0] == '$':
                        continue
                    try:
                        symbol = parent_scope[name]
                    except LookupError:
                        pass
                    else:
                        if isinstance(symbol, Subroutine):
                            usages[(parent_scope.name, symbol.name)].add(str(asmnode.sourceref))
    def check_directives(self, module: Module) -> None:
-        for node, parent in self.recurse_scopes(module):
+        for node, parent in module.all_scopes():
            if isinstance(node, Module):
                # check module-level directives
                imports = set()  # type: Set[str]
@ -84,14 +192,6 @@ class PlyParser:
                            raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref)
                    first_node = False
    def recurse_scopes(self, module: Module) -> Generator[Tuple[AstNode, AstNode], None, None]:
        # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
        yield module, None
        for block in list(module.scope.filter_nodes(Block)):
            yield block, module
            for subroutine in list(block.scope.filter_nodes(Subroutine)):
                yield subroutine, block
    def process_imports(self, module: Module) -> None:
        # (recursively) imports the modules
        imported = []
@ -141,18 +241,6 @@ class PlyParser:
                return filename
        return None
    def print_warning(self, text: str, sourceref: SourceRef=None) -> None:
        if sourceref:
            self.print_bold("warning: {}: {:s}".format(sourceref, text))
        else:
            self.print_bold("warning: " + text)
    def print_bold(self, text: str) -> None:
        if sys.stdout.isatty():
            print("\x1b[1m" + text + "\x1b[0m", flush=True)
        else:
            print(text)
    def handle_parse_error(self, exc: ParseError) -> None:
        self.parse_errors += 1
        if sys.stderr.isatty():
@ -176,6 +264,9 @@ class PlyParser:
 if __name__ == "__main__":
    description = "Compiler for IL65 language, code name 'Sick'"
    print("\n" + description)
    plyparser = PlyParser()
    m = plyparser.parse_file(sys.argv[1])
-    print(str(m)[:400], "...")
+    optimize(m)
    print()
--- a/il65/handwritten/optimize.py
+++ b/il65/handwritten/optimize.py
@ -131,7 +131,7 @@ class Optimizer:
        for sub in list(block.symbols.iter_subroutines()):
            usages = self.parsed.subroutine_usage[(sub.blockname, sub.name)]
            if not usages and sub.blockname + '.' + sub.name not in never_remove:
-                block.symbols.discard_sub(sub.name)
+                block.symbols.remove_node(sub.name)
                discarded.append(sub.name)
        if discarded:
            print("{}: discarded {:d} unused subroutines from block '{:s}'".format(block.sourceref, len(discarded), block.name))
--- a/il65/optimizer.py
+++ b/il65/optimizer.py
@ -0,0 +1,99 @@
 """
 Programming Language for 6502/6510 microprocessors
 This is the code to optimize the parse tree.
 Written by Irmen de Jong (irmen@razorvine.net)
 License: GNU GPL 3.0, see LICENSE
 """
 from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment
 from .plylexer import print_warning, print_bold
 class Optimizer:
    def __init__(self, mod: Module) -> None:
        self.num_warnings = 0
        self.module = mod
    def optimize(self) -> None:
        self.num_warnings = 0
        # self.remove_augmentedassign_incrdecr_nops(block)   # @todo
        self.remove_useless_assigns()
        # self.combine_assignments_into_multi(block)   # @todo
        self.optimize_multiassigns()
        self.remove_unused_subroutines()
        # self.optimize_compare_with_zero(block)  # @todo
        self.remove_empty_blocks()
    def remove_useless_assigns(self) -> None:
        # remove assignment statements that do nothing (A=A)
        for mnode, parent in self.module.all_scopes():
            if mnode.scope:
                for assignment in list(mnode.scope.nodes):
                    if isinstance(assignment, Assignment):
                        assignment.left = [lv for lv in assignment.left if lv != assignment.right]
                        if not assignment.left:
                            mnode.scope.remove_node(assignment)
                            self.num_warnings += 1
                            print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref))
    def optimize_multiassigns(self) -> None:
        # optimize multi-assign statements (remove duplicate targets, optimize order)
        for mnode, parent in self.module.all_scopes():
            if mnode.scope:
                for assignment in mnode.scope.nodes:
                    if isinstance(assignment, Assignment) and len(assignment.left) > 1:
                        # remove duplicates
                        lvalues = set(assignment.left)
                        if len(lvalues) != len(assignment.left):
                            self.num_warnings += 1
                            print_warning("{}: removed duplicate assignment targets".format(assignment.sourceref))
                        # @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
                        assignment.left = list(lvalues)
    def remove_unused_subroutines(self) -> None:
        # some symbols are used by the emitted assembly code from the code generator,
        # and should never be removed or the assembler will fail
        never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB",
                        "c64flt.GIVUAYF", "c64flt.copy_mflt", "c64flt.float_add_one", "c64flt.float_sub_one",
                        "c64flt.float_add_SW1_to_XY", "c64flt.float_sub_SW1_from_XY"}
        num_discarded = 0
        for sub, parent in self.module.all_scopes():
            if isinstance(sub, Subroutine):
                usages = self.module.subroutine_usage[(parent.name, sub.name)]
                if not usages and parent.name + '.' + sub.name not in never_remove:
                    parent.scope.remove_node(sub)
                    num_discarded += 1
        print("discarded {:d} unused subroutines".format(num_discarded))
    def remove_empty_blocks(self) -> None:
        # remove blocks without name and without address, or that are empty
        for node, parent in self.module.all_scopes():
            if isinstance(node, (Subroutine, Block)):
                if not node.scope:
                    continue
                if all(isinstance(n, Directive) for n in node.scope.nodes):
                    empty = True
                    for n in node.scope.nodes:
                        empty = empty and n.name not in {"asmbinary", "asminclude"}
                    if empty:
                        self.num_warnings += 1
                        print_warning("ignoring empty block or subroutine", node.sourceref)
                        assert isinstance(parent, (Block, Module))
                        parent.scope.nodes.remove(node)
            if isinstance(node, Block):
                if not node.name and node.address is None:
                    self.num_warnings += 1
                    print_warning("ignoring block without name and address", node.sourceref)
                    assert isinstance(parent, Module)
                    parent.scope.nodes.remove(node)
 def optimize(mod: Module) -> None:
    opt = Optimizer(mod)
    opt.optimize()
    if opt.num_warnings:
        if opt.num_warnings == 1:
            print_bold("there is one optimization warning.")
        else:
            print_bold("there are {:d} optimization warnings.".format(opt.num_warnings))
--- a/il65/plylexer.py
+++ b/il65/plylexer.py
@ -147,11 +147,10 @@ reserved = {
 # rules for tokens with some actions
 def t_inlineasm(t):
-    r"%asm\s*\{\s*"
+    r"%asm\s*\{[^\S\n]*"
    t.lexer.code_start = t.lexer.lexpos     # Record start position
    t.lexer.level = 1                       # initial brace level
    t.lexer.begin("inlineasm")             # enter state 'inlineasm'
    t.lexer.lineno += 1
 def t_inlineasm_lbrace(t):
@ -320,6 +319,20 @@ def find_tok_column(token):
    return token.lexpos - last_cr
 def print_warning(text: str, sourceref: SourceRef = None) -> None:
    if sourceref:
        print_bold("warning: {}: {:s}".format(sourceref, text))
    else:
        print_bold("warning: " + text)
 def print_bold(text: str) -> None:
    if sys.stdout.isatty():
        print("\x1b[1m" + text + "\x1b[0m", flush=True)
    else:
        print(text)
 lexer = ply.lex.lex()
--- a/il65/plyparser.py
+++ b/il65/plyparser.py
@ -6,9 +6,10 @@ Written by Irmen de Jong (irmen@razorvine.net)
 License: GNU GPL 3.0, see LICENSE
 """
 from collections import defaultdict
 import attr
 from ply.yacc import yacc
-from typing import Union, Generator
+from typing import Union, Generator, Tuple, List
 from .plylexer import SourceRef, tokens, lexer, find_tok_column
@ -104,11 +105,26 @@ class Scope(AstNode):
            if isinstance(node, nodetype):
                yield node
    def remove_node(self, node: AstNode) -> None:
        if hasattr(node, "name"):
            del self.symbols[node.name]
        self.nodes.remove(node)
@attr.s(cmp=False, repr=False)
 class Module(AstNode):
    name = attr.ib(type=str)     # filename
    scope = attr.ib(type=Scope)
    subroutine_usage = attr.ib(type=defaultdict, init=False, default=attr.Factory(lambda: defaultdict(set)))    # will be populated later
    def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]:
        # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
        # it iterates of copies of the node collections, so it's okay to modify the scopes you iterate over.
        yield self, None
        for block in list(self.scope.filter_nodes(Block)):
            yield block, self
            for subroutine in list(block.scope.filter_nodes(Subroutine)):
                yield subroutine, block
@attr.s(cmp=False, repr=False)
@ -130,6 +146,19 @@ class Label(AstNode):
 class Register(AstNode):
    name = attr.ib(type=str)
    def __hash__(self) -> int:
        return hash(self.name)
    def __eq__(self, other) -> bool:
        if not isinstance(other, Register):
            return NotImplemented
        return self.name == other.name
    def __lt__(self, other) -> bool:
        if not isinstance(other, Register):
            return NotImplemented
        return self.name < other.name
@attr.s(cmp=False, repr=False)
 class PreserveRegs(AstNode):
@ -138,13 +167,16 @@ class PreserveRegs(AstNode):
@attr.s(cmp=False, repr=False)
 class Assignment(AstNode):
-    left = attr.ib()     # type: Union[str, TargetRegisters, Dereference]
+    # can be single- or multi-assignment
    left = attr.ib(type=list)     # type: List[Union[str, TargetRegisters, Dereference]]
    right = attr.ib()
@attr.s(cmp=False, repr=False)
-class AugAssignment(Assignment):
+class AugAssignment(AstNode):
    left = attr.ib()
    operator = attr.ib(type=str)
    right = attr.ib()
@attr.s(cmp=False, repr=False)
@ -153,6 +185,9 @@ class SubCall(AstNode):
    preserve_regs = attr.ib()
    arguments = attr.ib()
    def __attrs_post_init__(self):
        self.arguments = self.arguments or []
@attr.s(cmp=False, repr=False)
 class Return(AstNode):
@ -689,14 +724,14 @@ def p_assignment(p):
    assignment :  assignment_target  IS  expression
               |  assignment_target  IS  assignment
    """
-    p[0] = Assignment(left=p[1], right=p[3], sourceref=_token_sref(p, 1))
+    p[0] = Assignment(left=[p[1]], right=p[3], sourceref=_token_sref(p, 2))
 def p_aug_assignment(p):
    """
    aug_assignment :  assignment_target  AUGASSIGN  expression
    """
-    p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
+    p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
 precedence = (
@ -721,7 +756,7 @@ def p_expression(p):
               |  expression  EQUALS  expression
               |  expression  NOTEQUALS  expression
    """
-    p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
+    p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
 def p_expression_uminus(p):
--- a/todo.ill
+++ b/todo.ill
@ -24,6 +24,12 @@ start:
        c64.CHROUT('\n')
        return
 	screen = border =  cursor = X = Y = A = X = Y = A = border = cursor = border = cursor = 66  ; multi-assign!
 	X = Y = A = X = Y = A = X = Y = A = X = Y = AX = Y = A = X = AY = XY =A = 123  ; multi-assign!
 	XY = XY
 	A= A
 	A=X=Y=A
 rndloop:
        XY = math.randword()
@ -126,6 +132,7 @@ rndloop:
                bne  -
        }
 loop   :
        A=c64.GETIN()
        if_not goto loop
@ -172,9 +179,9 @@ loop   :
 sub sub1 () -> () {
 	%breakpoint
 	%saveregisters off
 	%breakpoint
 	%breakpoint
 label:
 	return