comp

2024-11-29 17:50:35 +00:00 · 2018-01-07 19:14:21 +01:00 · 2018-01-07 19:14:21 +01:00 · 68c1d2af4c
commit 68c1d2af4c
parent 9b68722df3
9 changed files with 330 additions and 70 deletions
--- a/il65/exprparse.py
+++ b/il65/exprparse.py
@ -13,7 +13,7 @@ from .symbols import FLOAT_MAX_POSITIVE, FLOAT_MAX_NEGATIVE, SourceRef, SymbolTa


 class ParseError(Exception):
-    def __init__(self, message: str, sourcetext: str, sourceref: SourceRef) -> None:
+    def __init__(self, message: str, sourcetext: Optional[str], sourceref: SourceRef) -> None:
        self.sourceref = sourceref
        self.msg = message
        self.sourcetext = sourcetext
--- a/il65/lexer.py
+++ b/il65/lexer.py
@ -9,6 +9,8 @@ License: GNU GPL 3.0, see LICENSE
 import sys
 import ply.lex
 from .symbols import SourceRef
+from .parse import ParseError
+

 # token names

@ -218,7 +220,7 @@ def t_NAME(t):


 def t_DIRECTIVE(t):
-    r"%[a-z]+"
+    r"%[a-z]+\b"
    t.value = t.value[1:]
    return t

@ -284,13 +286,15 @@ def t_PRESERVEREGS(t):
 def t_ENDL(t):
    r"\n+"
    t.lexer.lineno += len(t.value)
+    t.value = "\n"
    return t    # end of lines are significant to the parser


 def t_error(t):
    line, col = t.lineno, find_tok_column(t)
-    sref = SourceRef("@todo-filename-f1", line, col)
-    t.lexer.error_function("{}: Illegal character '{:s}'", sref, t.value[0])
+    filename = getattr(t.lexer, "source_filename", "<unknown-file>")
+    sref = SourceRef(filename, line, col)
+    t.lexer.error_function("{}: Illegal character '{:s}'", sref, t.value[0], sourceref=sref)
    t.lexer.skip(1)


@ -300,8 +304,8 @@ def find_tok_column(token):
    return token.lexpos - last_cr


-def error_function(fmtstring, *args):
-    print("ERROR:", fmtstring.format(*args), file=sys.stderr)
+def error_function(fmtstring, *args, sourceref: SourceRef=None) -> None:
+    raise ParseError(fmtstring.format(*args), None, sourceref=sourceref)


 lexer = ply.lex.lex()
--- a/il65/parse.py
+++ b/il65/parse.py
@ -475,9 +475,9 @@ class Parser:
                    if self.result.format == ProgramFormat.PRG and self.result.with_sys and self.result.start_address != 0x0801:
                        raise self.PError("cannot use non-default 'address' when output format includes basic SYS program")
                    continue
-                elif directive == "preserve_registers":
+                elif directive == "saveregisters":
                    if preserve_specified:
-                        raise self.PError("can only specify preserve_registers option once")
+                        raise self.PError("can only specify saveregisters option once")
                    preserve_specified = True
                    _, _, optionstr = line.partition(" ")
                    self.result.preserve_registers = optionstr in ("", "true", "yes")
@ -649,7 +649,7 @@ class Parser:
            elif directive == "breakpoint":
                self.cur_block.statements.append(BreakpointStmt(self.sourceref))
                self.print_warning("breakpoint defined")
-            elif directive == "preserve_registers":
+            elif directive == "saveregisters":
                self.result.preserve_registers = optionstr in ("", "true", "yes")
            else:
                raise self.PError("invalid directive")
--- a/il65/plycompiler.py
+++ b/il65/plycompiler.py
@ -0,0 +1,153 @@
+import os
+import sys
+import linecache
+from typing import Optional, Generator, Tuple, Set
+from .plyparser import parse_file, Module, Directive, Block, Subroutine, AstNode
+from .parse import ParseError
+from .symbols import SourceRef
+
+
+class PlyParser:
+    def __init__(self):
+        self.parse_errors = 0
+        self.parsing_import = False
+
+    def parse_file(self, filename: str) -> Module:
+        print("parsing:", filename)
+        module = parse_file(filename)
+        try:
+            self.check_directives(module)
+            self.remove_empty_blocks(module)
+            self.process_imports(module)
+        except ParseError as x:
+            self.handle_parse_error(x)
+        return module
+
+    def remove_empty_blocks(self, module: Module) -> None:
+        # remove blocks without name and without address, or that are empty
+        for scope, parent in self.recurse_scopes(module):
+            if isinstance(scope, (Subroutine, Block)):
+                if not scope.scope:
+                    continue
+                if all(isinstance(n, Directive) for n in scope.scope.nodes):
+                    empty = True
+                    for n in scope.scope.nodes:
+                        empty = empty and n.name not in {"asmbinary", "asminclude"}
+                    if empty:
+                        self.print_warning("ignoring empty block or subroutine", scope.sourceref)
+                        assert isinstance(parent, (Block, Module))
+                        parent.scope.nodes.remove(scope)
+            if isinstance(scope, Block):
+                if not scope.name and scope.address is None:
+                    self.print_warning("ignoring block without name and address", scope.sourceref)
+                    assert isinstance(parent, Module)
+                    parent.scope.nodes.remove(scope)
+
+    def check_directives(self, module: Module) -> None:
+        for node, parent in self.recurse_scopes(module):
+            if isinstance(node, Module):
+                # check module-level directives
+                imports = set()  # type: Set[str]
+                for directive in node.scope.filter_nodes(Directive):
+                    if directive.name not in {"output", "zp", "address", "import", "saveregisters"}:
+                        raise ParseError("invalid directive in module", None, directive.sourceref)
+                    if directive.name == "import":
+                        if imports & set(directive.args):
+                            raise ParseError("duplicate import", None, directive.sourceref)
+                        imports |= set(directive.args)
+            if isinstance(node, (Block, Subroutine)):
+                # check block and subroutine-level directives
+                first_node = True
+                if not node.scope:
+                    continue
+                for sub_node in node.scope.nodes:
+                    if isinstance(sub_node, Directive):
+                        if sub_node.name not in {"asmbinary", "asminclude", "breakpoint", "saveregisters"}:
+                            raise ParseError("invalid directive in " + node.__class__.__name__.lower(), None, sub_node.sourceref)
+                        if sub_node.name == "saveregisters" and not first_node:
+                            raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref)
+                    first_node = False
+
+    def recurse_scopes(self, module: Module) -> Generator[Tuple[AstNode, AstNode], None, None]:
+        # generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
+        yield module, None
+        for block in list(module.scope.filter_nodes(Block)):
+            yield block, module
+            for subroutine in list(block.scope.filter_nodes(Subroutine)):
+                yield subroutine, block
+
+    def process_imports(self, module: Module) -> None:
+        # (recursively) imports the modules
+        imported = []
+        for directive in module.scope.filter_nodes(Directive):
+            if directive.name == "import":
+                if len(directive.args) < 1:
+                    raise ParseError("missing argument(s) for import directive", None, directive.sourceref)
+                for arg in directive.args:
+                    filename = self.find_import_file(arg, directive.sourceref.file)
+                    if not filename:
+                        raise ParseError("imported file not found", None, directive.sourceref)
+                    imported_module = self.import_file(filename)
+                    imported_module.scope.parent_scope = module.scope
+                    imported.append(imported_module)
+        # append the imported module's contents (blocks) at the end of the current module
+        for imported_module in imported:
+            for block in imported_module.scope.filter_nodes(Block):
+                module.scope.nodes.append(block)
+
+    def import_file(self, filename: str) -> Module:
+        sub_parser = PlyParser()
+        return sub_parser.parse_file(filename)
+
+    def find_import_file(self, modulename: str, sourcefile: str) -> Optional[str]:
+        filename_at_source_location = os.path.join(os.path.split(sourcefile)[0], modulename)
+        filename_at_libs_location = os.path.join(os.getcwd(), "lib", modulename)
+        candidates = [modulename,
+                      filename_at_source_location,
+                      filename_at_libs_location,
+                      modulename+".ill",
+                      filename_at_source_location+".ill",
+                      filename_at_libs_location+".ill"]
+        for filename in candidates:
+            if os.path.isfile(filename):
+                return filename
+        return None
+
+    def print_warning(self, text: str, sourceref: SourceRef=None) -> None:
+        if sourceref:
+            self.print_bold("warning: {}: {:s}".format(sourceref, text))
+        else:
+            self.print_bold("warning: " + text)
+
+    def print_bold(self, text: str) -> None:
+        if sys.stdout.isatty():
+            print("\x1b[1m" + text + "\x1b[0m", flush=True)
+        else:
+            print(text)
+
+    def handle_parse_error(self, exc: ParseError) -> None:
+        self.parse_errors += 1
+        if sys.stderr.isatty():
+            print("\x1b[1m", file=sys.stderr)
+        if self.parsing_import:
+            print("Error (in imported file):", str(exc), file=sys.stderr)
+        else:
+            print("Error:", str(exc), file=sys.stderr)
+        if exc.sourcetext is None:
+            exc.sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip()
+        if exc.sourcetext:
+            # remove leading whitespace
+            stripped = exc.sourcetext.lstrip()
+            num_spaces = len(exc.sourcetext) - len(stripped)
+            stripped = stripped.rstrip()
+            print("  " + stripped, file=sys.stderr)
+            if exc.sourceref.column:
+                print("  " + ' ' * (exc.sourceref.column - num_spaces) + '^', file=sys.stderr)
+        if sys.stderr.isatty():
+            print("\x1b[0m", file=sys.stderr, end="", flush=True)
+
+
+if __name__ == "__main__":
+    plyparser = PlyParser()
+    m = plyparser.parse_file(sys.argv[1])
+    print(str(m)[:400], "...")
--- a/il65/plyparser.py
+++ b/il65/plyparser.py
@ -8,7 +8,7 @@ License: GNU GPL 3.0, see LICENSE

 import attr
 from ply.yacc import yacc
-from typing import Union
+from typing import Union, Type, Generator
 from .symbols import SourceRef
 from .lexer import tokens, lexer, find_tok_column   # get the lexer tokens. required.

@ -48,70 +48,121 @@ class AstNode:
        tostr(self, 0)


-@attr.s(cmp=False)
-class Module(AstNode):
-    nodes = attr.ib(type=list)
-
-
-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Directive(AstNode):
    name = attr.ib(type=str)
    args = attr.ib(type=list, default=attr.Factory(list))


-@attr.s(cmp=False)
+@attr.s(cmp=False, slots=True, repr=False)
 class Scope(AstNode):
    nodes = attr.ib(type=list)
+    symbols = attr.ib(init=False)
+    name = attr.ib(init=False)          # will be set by enclosing block, or subroutine etc.
+    parent_scope = attr.ib(init=False, default=None)  # will be wired up later
+    save_registers = attr.ib(type=bool, default=False, init=False)    # XXX will be set later
+
+    def __attrs_post_init__(self):
+        # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"]
+        self.symbols = {}
+        for node in self.nodes:
+            if isinstance(node, (Label, VarDef)):
+                self.symbols[node.name] = node
+            if isinstance(node, Subroutine):
+                self.symbols[node.name] = node
+                if node.scope is not None:
+                    node.scope.parent_scope = self
+            if isinstance(node, Block):
+                if node.name:
+                    self.symbols[node.name] = node
+                    node.scope.parent_scope = self
+
+    def __getitem__(self, name: str) -> AstNode:
+        if '.' in name:
+            # look up the dotted name starting from the topmost scope
+            scope = self
+            while scope.parent_scope:
+                scope = scope.parent_scope
+            for namepart in name.split('.'):
+                if isinstance(scope, (Block, Subroutine)):
+                    scope = scope.scope
+                if not isinstance(scope, Scope):
+                    raise LookupError("undefined symbol: " + name)
+                scope = scope.symbols.get(namepart, None)
+                if scope is None:
+                    raise LookupError("undefined symbol: " + name)
+            return scope
+        else:
+            # find the name in nested scope hierarchy
+            if name in self.symbols:
+                return self.symbols[name]
+            if self.parent_scope:
+                return self.parent_scope[name]
+            raise LookupError("undefined symbol: " + name)
+
+    def filter_nodes(self, nodetype) -> Generator[AstNode, None, None]:
+        for node in self.nodes:
+            if isinstance(node, nodetype):
+                yield node


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
+class Module(AstNode):
+    name = attr.ib(type=str)     # filename
+    scope = attr.ib(type=Scope)
+
+
+@attr.s(cmp=False, repr=False)
 class Block(AstNode):
    scope = attr.ib(type=Scope)
    name = attr.ib(type=str, default=None)
    address = attr.ib(type=int, default=None)

+    def __attrs_post_init__(self):
+        self.scope.name = self.name

-@attr.s(cmp=False)
+
+@attr.s(cmp=False, repr=False)
 class Label(AstNode):
    name = attr.ib(type=str)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Register(AstNode):
    name = attr.ib(type=str)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class PreserveRegs(AstNode):
    registers = attr.ib(type=str)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Assignment(AstNode):
    left = attr.ib()     # type: Union[str, TargetRegisters, Dereference]
    right = attr.ib()


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class AugAssignment(Assignment):
    operator = attr.ib(type=str)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class SubCall(AstNode):
    target = attr.ib()
    preserve_regs = attr.ib()
    arguments = attr.ib()


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Return(AstNode):
    value_A = attr.ib(default=None)
    value_X = attr.ib(default=None)
    value_Y = attr.ib(default=None)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class TargetRegisters(AstNode):
    registers = attr.ib(type=list)

@ -119,12 +170,12 @@ class TargetRegisters(AstNode):
        self.registers.append(register)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class InlineAssembly(AstNode):
    assembly = attr.ib(type=str)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class VarDef(AstNode):
    name = attr.ib(type=str)
    vartype = attr.ib()
@ -132,13 +183,13 @@ class VarDef(AstNode):
    value = attr.ib(default=None)


-@attr.s(cmp=False, slots=True)
+@attr.s(cmp=False, slots=True, repr=False)
 class Datatype(AstNode):
    name = attr.ib(type=str)
    dimension = attr.ib(type=list, default=None)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Subroutine(AstNode):
    name = attr.ib(type=str)
    param_spec = attr.ib()
@ -149,40 +200,42 @@ class Subroutine(AstNode):
    def __attrs_post_init__(self):
        if self.scope is not None and self.address is not None:
            raise ValueError("subroutine must have either a scope or an address, not both")
+        if self.scope is not None:
+            self.scope.name = self.name


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Goto(AstNode):
    target = attr.ib()
    if_stmt = attr.ib(default=None)
    condition = attr.ib(default=None)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class Dereference(AstNode):
    location = attr.ib()
    datatype = attr.ib()


-@attr.s(cmp=False, slots=True)
+@attr.s(cmp=False, slots=True, repr=False)
 class CallTarget(AstNode):
    target = attr.ib()
    address_of = attr.ib(type=bool)


-@attr.s(cmp=False, slots=True)
+@attr.s(cmp=False, slots=True, repr=False)
 class CallArgument(AstNode):
    value = attr.ib()
    name = attr.ib(type=str, default=None)


-@attr.s(cmp=False)
+@attr.s(cmp=False, repr=False)
 class UnaryOp(AstNode):
    operator = attr.ib(type=str)
    operand = attr.ib()


-@attr.s(cmp=False, slots=True)
+@attr.s(cmp=False, slots=True, repr=False)
 class Expression(AstNode):
    left = attr.ib()
    operator = attr.ib(type=str)
@ -195,7 +248,13 @@ def p_start(p):
          |  module_elements
    """
    if p[1]:
-        p[0] = Module(nodes=p[1], sourceref=_token_sref(p, 1))
+        scope = Scope(nodes=p[1], sourceref=_token_sref(p, 1))
+        scope.name = "<" + p.lexer.source_filename + " global scope>"
+        p[0] = Module(name=p.lexer.source_filename, scope=scope, sourceref=_token_sref(p, 1))
+    else:
+        scope = Scope(nodes=[], sourceref=_token_sref(p, 1))
+        scope.name = "<" + p.lexer.source_filename + " global scope>"
+        p[0] = Module(name=p.lexer.source_filename, scope=scope, sourceref=SourceRef(lexer.source_filename, 1, 1))


 def p_module(p):
@ -215,6 +274,7 @@ def p_module_elt(p):
               |  directive
               |  block
    """
+    if p[1] != '\n':
        p[0] = p[1]


@ -245,6 +305,7 @@ def p_directive_arg(p):
    directive_arg :  NAME
                  |  INTEGER
                  |  STRING
+                  |  BOOLEAN
    """
    p[0] = p[1]

@ -299,7 +360,10 @@ def p_scope_elements(p):
                   |  scope_elements  scope_element
    """
    if len(p) == 2:
-        p[0] = [p[1]]
+        p[0] = [] if p[1] in (None, '\n') else [p[1]]
+    else:
+        if p[2] in (None, '\n'):
+            p[0] = p[1]
        else:
            p[0] = p[1] + [p[2]]

@ -314,7 +378,10 @@ def p_scope_element(p):
                  |  inlineasm
                  |  statement
    """
+    if p[1] != '\n':
        p[0] = p[1]
+    else:
+        p[0] = None


 def p_label(p):
@ -729,17 +796,18 @@ def p_empty(p):


 def p_error(p):
+    stack_state_str = '  '.join([symbol.type for symbol in parser.symstack][1:])
+    print('\n[ERROR DEBUG: parser state={:d} stack: {} . {} ]'.format(parser.state, stack_state_str, p))
    if p:
        sref = SourceRef(p.lexer.source_filename, p.lineno, find_tok_column(p))
-        p.lexer.error_function("{}: before '{:.20s}' ({})", sref, str(p.value), repr(p))
+        p.lexer.error_function("syntax error before '{:.20s}'", str(p.value), sourceref=sref)
    else:
-        lexer.error_function("{}: at end of input", "@todo-filename3")
+        lexer.error_function("syntax error at end of input", lexer.source_filename, sourceref=None)


 def _token_sref(p, token_idx):
    """ Returns the coordinates for the YaccProduction object 'p' indexed
-        with 'token_idx'. The coordinate includes the 'lineno' and
-        'column'. Both follow the lex semantic, starting from 1.
+        with 'token_idx'. The coordinate includes the 'lineno' and 'column', starting from 1.
    """
    last_cr = p.lexer.lexdata.rfind('\n', 0, p.lexpos(token_idx))
    if last_cr < 0:
@ -772,12 +840,10 @@ class TokenFilter:
 parser = yacc(write_tables=True)


-if __name__ == "__main__":
-    import sys
-    file = sys.stdin  # open(sys.argv[1], "rU")
-    lexer.source_filename = "derp"
-    tokenfilter = TokenFilter(lexer)
-    result = parser.parse(input=file.read(),
-                          tokenfunc=tokenfilter.token) or Module(None, SourceRef(lexer.source_filename, 1, 1))
-    print("RESULT:")
-    result.print_tree()
+def parse_file(filename: str) -> Module:
+    lexer.lineno = 1
+    lexer.source_filename = filename
+    tfilter = TokenFilter(lexer)
+    with open(filename, "rU") as inf:
+        sourcecode = inf.read()
+    return parser.parse(input=sourcecode, tokenfunc=tfilter.token)
--- a/il65/symbols.py
+++ b/il65/symbols.py
@ -710,7 +710,8 @@ class AstNode:
            try:
                variables = vars(node).items()
            except TypeError:
-                variables = {}
+                pass
+            else:
                for name, value in variables:
                    if isinstance(value, AstNode):
                        tostr(value, level + 1)
--- a/lib/mathlib.ill
+++ b/lib/mathlib.ill
@ -59,7 +59,6 @@ _m_with_add	stx  SCRATCH_ZP1
 	}
 }

-
 sub  multiply_bytes_addA_16  (byte1: X, byte2: Y, add: A) -> (A?, XY)  {
 	; ---- multiply 2 bytes and add A, result as word in X/Y (unsigned)
 	%asm {
--- a/reference.md
+++ b/reference.md
@ -400,7 +400,7 @@ Normally, the registers are NOT preserved when calling a subroutine or when a ce
 operations are performed. Most calls will be simply a few instructions to load the
 values in the registers and then a JSR or JMP.

-By using the ``%preserve_registers`` directive (globally or in a block) you can tell the
+By using the ``%saveregisters`` directive (globally or in a block) you can tell the
 compiler to preserve all registers. This does generate a lot of extra code that puts
 original values on the stack and gets them off the stack again once the subroutine is done.
 In this case however you don't have to worry about A, X and Y losing their original values
--- a/todo.ill
+++ b/todo.ill
@ -4,12 +4,17 @@

 ~ main $4444 {

+	%saveregisters true, false
+

                const   num = 2
                var     var1  =2
                var .word  wvar1 = 2

+
 start:
+
+
        A=math.randbyte()
        A +=  c64.RASTER
        A-=c64.TIME_LO
@ -161,6 +166,38 @@ loop   :
        c64scr.print_word_decimal(1222)
        c64.CHROUT('\n')

+	%breakpoint
+
+        return
+
+sub sub1 () -> () {
+
+	%saveregisters off
+	%breakpoint
+
+label:
 	return

 }
+
+sub emptysub () -> () {
+
+	%saveregisters on
+
+}
+
+}
+
+~ zzzz {
+
+	%saveregisters
+	%breakpoint
+
+	return
+
+}
+~  {
+	;sdfsdf
+	return
+	;sdfsdf
+}