Merge pull request #16 from catseye/goto-at-end-of-block

goto only at end of block, not only in tail position
2024-06-07 22:29:27 +00:00 · 2018-12-11 16:51:44 +00:00 · 2018-12-11 16:51:44 +00:00 · 70ba40bf3e
commit 70ba40bf3e
parent f99cf47661 bf3f1835ed
6 changed files with 483 additions and 103 deletions
--- a/HISTORY.md
+++ b/HISTORY.md
@ -4,6 +4,9 @@ History of SixtyPical
 0.18
 ----

+*   Syntactically, `goto` may only appear at the end of a block.
+    It need no longer be the final instruction in a routine,
+    as long as the type context is consistent at every exit.
 *   `cmp` instruction can now perform a 16-bit unsigned comparison
    of `word` memory locations (at the cost of trashing `a`.)
 *   Fixed pathological memory use in the lexical scanner - should
--- a/TODO.md
+++ b/TODO.md
@ -55,13 +55,9 @@ error.

 ### Tail-call optimization

-More generally, define a block as having zero or one `goto`s at the end.  (and `goto`s cannot
-appear elsewhere.)
-
 If a block ends in a `call` can that be converted to end in a `goto`?  Why not?  I think it can,
 if the block is in tail position.  The constraints should iron out the same both ways.

-And - once we have this - why do we need `goto` to be in tail position, strictly?
 As long as the routine has consistent type context every place it exits, that should be fine.

 ### "Include" directives
--- a/src/sixtypical/analyzer.py
+++ b/src/sixtypical/analyzer.py
@ -34,6 +34,11 @@ class InconsistentInitializationError(StaticAnalysisError):
    pass


+class InconsistentExitError(StaticAnalysisError):
+    """The type context differs at two different exit points of the routine."""
+    pass
+
+
 class ForbiddenWriteError(StaticAnalysisError):
    pass

@ -46,6 +51,12 @@ class IllegalJumpError(StaticAnalysisError):
    pass


+class TerminatedContextError(StaticAnalysisError):
+    """What the program is doing here is not valid, due to preceding `goto`s,
+    which make this dead code."""
+    pass
+
+
 class RangeExceededError(StaticAnalysisError):
    pass

@ -101,6 +112,7 @@ class Context(object):
        self._touched = set()
        self._range = dict()
        self._writeable = set()
+        self._terminated = False
        self._gotos_encountered = set()

        for ref in inputs:
@ -132,6 +144,15 @@ class Context(object):
        c._writeable = set(self._writeable)
        return c

+    def update_from(self, other):
+        self.routines = other.routines
+        self.routine = other.routine
+        self._touched = set(other._touched)
+        self._range = dict(other._range)
+        self._writeable = set(other._writeable)
+        self._terminated = other._terminated
+        self._gotos_encounters = set(other._gotos_encountered)
+
    def each_meaningful(self):
        for ref in self._range.keys():
            yield ref
@ -140,6 +161,10 @@ class Context(object):
        for ref in self._touched:
            yield ref

+    def each_writeable(self):
+        for ref in self._writeable:
+            yield ref
+
    def assert_meaningful(self, *refs, **kwargs):
        exception_class = kwargs.get('exception_class', UnmeaningfulReadError)
        for ref in refs:
@ -279,6 +304,13 @@ class Context(object):
    def encountered_gotos(self):
        return self._gotos_encountered

+    def set_terminated(self):
+        # Having a terminated context and having encountered gotos is not the same thing.
+        self._terminated = True
+
+    def has_terminated(self):
+        return self._terminated
+
    def assert_types_for_read_table(self, instr, src, dest, type_):
        if (not TableType.is_a_table_type(src.ref.type, type_)) or (not dest.type == type_):
            raise TypeMismatchError(instr, '{} and {}'.format(src.ref.name, dest.name))
@ -364,19 +396,20 @@ class Analyzer(object):

    def analyze_routine(self, routine):
        assert isinstance(routine, Routine)
-        self.current_routine = routine
        if routine.block is None:
            # it's an extern, that's fine
            return
+
+        self.current_routine = routine
        type_ = routine.location.type
        context = Context(self.routines, routine, type_.inputs, type_.outputs, type_.trashes)
+        self.exit_contexts = []

        if self.debug:
            print("at start of routine `{}`:".format(routine.name))
            print(context)

        self.analyze_block(routine.block, context)
-        trashed = set(context.each_touched()) - set(context.each_meaningful())

        if self.debug:
            print("at end of routine `{}`:".format(routine.name))
@ -390,17 +423,40 @@ class Analyzer(object):
            print('-' * 79)
            print('')

-        # even if we goto another routine, we can't trash an output.
+        if self.exit_contexts:
+            # check that they are all consistent
+            exit_context = self.exit_contexts[0]
+            exit_meaningful = set(exit_context.each_meaningful())
+            exit_touched = set(exit_context.each_touched())
+            exit_writeable = set(exit_context.each_writeable())
+            for ex in self.exit_contexts[1:]:
+                if set(ex.each_meaningful()) != exit_meaningful:
+                    raise InconsistentExitError("Exit contexts are not consistent")
+                if set(ex.each_touched()) != exit_touched:
+                    raise InconsistentExitError("Exit contexts are not consistent")
+                if set(ex.each_writeable()) != exit_writeable:
+                    raise InconsistentExitError("Exit contexts are not consistent")
+            context.update_from(exit_context)
+
+        trashed = set(context.each_touched()) - set(context.each_meaningful())
+
+        # these all apply whether we encountered goto(s) in this routine, or not...:
+
+        # can't trash an output.
        for ref in trashed:
            if ref in type_.outputs:
                raise UnmeaningfulOutputError(routine, ref.name)

-        if not context.encountered_gotos():
-            for ref in type_.outputs:
-                context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError)
-            for ref in context.each_touched():
-                if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref):
-                    raise ForbiddenWriteError(routine, ref.name)
+        # all outputs are meaningful.
+        for ref in type_.outputs:
+            context.assert_meaningful(ref, exception_class=UnmeaningfulOutputError)
+
+        # if something was touched, then it should have been declared to be writable.
+        for ref in context.each_touched():
+            if ref not in type_.outputs and ref not in type_.trashes and not routine_has_static(routine, ref):
+                raise ForbiddenWriteError(routine, ref.name)
+
+        self.exit_contexts = None
        self.current_routine = None
        return context

@ -433,8 +489,8 @@ class Analyzer(object):
        dest = instr.dest
        src = instr.src

-        if context.encountered_gotos():
-            raise IllegalJumpError(instr, instr)
+        if context.has_terminated():
+            raise TerminatedContextError(instr, instr)

        if opcode == 'ld':
            if isinstance(src, IndexedRef):
@ -678,6 +734,40 @@ class Analyzer(object):
            self.assert_affected_within('trashes', type_, current_type)

            context.encounter_gotos(set([instr.location]))
+
+            # Now that we have encountered a goto, we update the
+            # context here to match what someone calling the goto'ed
+            # function directly, would expect.  (which makes sense
+            # when you think about it; if this goto's F, then calling
+            # this is like calling F, from the perspective of what is
+            # returned.)
+            #
+            # However, this isn't the current context anymore.  This
+            # is an exit context of this routine.
+
+            exit_context = context.clone()
+
+            for ref in type_.outputs:
+                exit_context.set_touched(ref)   # ?
+                exit_context.set_written(ref)
+
+            for ref in type_.trashes:
+                exit_context.assert_writeable(ref)
+                exit_context.set_touched(ref)
+                exit_context.set_unmeaningful(ref)
+
+            self.exit_contexts.append(exit_context)
+
+            # When we get to the end, we'll check that all the
+            # exit contexts are consistent with each other.
+
+            # We set the current context as having terminated.
+            # If we are in a branch, the merge will deal with
+            # having terminated.  If we are at the end of the
+            # routine, the routine end will deal with that.
+
+            context.set_terminated()
+
        elif opcode == 'trash':
            context.set_touched(instr.dest)
            context.set_unmeaningful(instr.dest)
@ -716,10 +806,23 @@ class Analyzer(object):
                message='initialized in block 2 but not in block 1 of `if {}`'.format(instr.src)
            )

-        # merge the contexts.  this used to be a method called `set_from`
-        context._touched = set(context1._touched) | set(context2._touched)
-        context.set_meaningful(*list(outgoing_meaningful))
-        context._writeable = set(context1._writeable) | set(context2._writeable)
+        # merge the contexts.
+
+        # first, the easy case: if one of the contexts has terminated, just use the other one.
+        # if both have terminated, we return a terminated context, and that's OK.
+
+        if context1.has_terminated():
+            context.update_from(context2)
+        elif context2.has_terminated():
+            context.update_from(context1)
+        else:
+            # the more complicated case: merge the contents of the contexts.
+            context._touched = set(context1._touched) | set(context2._touched)
+            context.set_meaningful(*list(outgoing_meaningful))
+            context._writeable = set(context1._writeable) | set(context2._writeable)
+
+        # in both cases, we need to merge the encountered gotos, in order that
+        # fallthru optimization continues to work correctly.
        context.encounter_gotos(context1.encountered_gotos() | context2.encountered_gotos())

        for ref in outgoing_trashes:
@ -733,6 +836,9 @@ class Analyzer(object):
        if instr.src is not None:  # None indicates 'repeat forever'
            context.assert_meaningful(instr.src)

+        if context.encountered_gotos():
+            raise IllegalJumpError(instr, instr)
+
        # now analyze it having been executed a second time, with the context
        # of it having already been executed.
        self.analyze_block(instr.block, context)
--- a/src/sixtypical/parser.py
+++ b/src/sixtypical/parser.py
@ -380,6 +380,8 @@ class Parser(object):
        self.scanner.expect('{')
        while not self.scanner.on('}'):
            instrs.append(self.instr())
+            if isinstance(instrs[-1], SingleOp) and instrs[-1].opcode == 'goto':
+                break
        self.scanner.expect('}')
        return Block(self.scanner.line_number, instrs=instrs)

--- a/tests/SixtyPical
+++ b/tests/SixtyPical
@ -2326,7 +2326,7 @@ But only if they are bytes.
    | }
    ? TypeMismatchError

-A `goto` cannot appear within a `save` block, even if it is otherwise in tail position.
+A `goto` cannot appear within a `save` block.

    | define other routine
    |   trashes a, z, n
@ -2371,8 +2371,7 @@ A `goto` cannot appear within a `save` block, even if it is otherwise in tail po
    | }
    = ok

-A `goto` cannot appear within a `with interrupts` block, even if it is
-otherwise in tail position.
+A `goto` cannot appear within a `with interrupts` block.

    | vector routine
    |   inputs x
@ -3019,87 +3018,7 @@ Calling the vector does indeed trash the things the vector says it does.
    | }
    ? UnmeaningfulOutputError: x

-`goto`, if present, must be in tail position (the final instruction in a routine.)
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     ld x, 0
-    |     goto bar
-    | }
-    = ok
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     goto bar
-    |     ld x, 0
-    | }
-    ? IllegalJumpError
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     ld x, 0
-    |     if z {
-    |         ld x, 1
-    |         goto bar
-    |     }
-    | }
-    = ok
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     ld x, 0
-    |     if z {
-    |         ld x, 1
-    |         goto bar
-    |     }
-    |     ld x, 0
-    | }
-    ? IllegalJumpError
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     ld x, 0
-    |     if z {
-    |         ld x, 1
-    |         goto bar
-    |     } else {
-    |         ld x, 0
-    |         goto bar
-    |     }
-    | }
-    = ok
-
-    | define bar routine trashes x, z, n {
-    |     ld x, 200
-    | }
-    | 
-    | define main routine trashes x, z, n {
-    |     ld x, 0
-    |     if z {
-    |         ld x, 1
-    |         goto bar
-    |     } else {
-    |         ld x, 0
-    |     }
-    | }
-    = ok
-
-For the purposes of `goto`, the end of a loop is never tail position.
+For now at least, you cannot have a `goto` inside a `repeat` loop.

    | define bar routine trashes x, z, n {
    |     ld x, 200
@ -3114,6 +3033,341 @@ For the purposes of `goto`, the end of a loop is never tail position.
    | }
    ? IllegalJumpError

+`goto`, as a matter of syntax, can only appear at the end
+of a block; but it need not be the final instruction in a
+routine.
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     goto bar
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     }
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     }
+    |     goto bar
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     }
+    |     ld x, 0
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |         goto bar
+    |     }
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |     }
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |     }
+    |     ld x, 0
+    | }
+    = ok
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |     }
+    |     goto bar
+    | }
+    = ok
+
+Even though `goto` can only appear at the end of a block,
+you can still wind up with dead code; the analysis detects
+this.
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |         goto bar
+    |     }
+    |     ld x, 100
+    | }
+    ? TerminatedContextError
+
+It is important that the type context at every
+`goto` is compatible with the type context at the end of
+the routine.
+
+    | define bar routine
+    |   inputs x
+    |   trashes x, z, n
+    | {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 0
+    |     }
+    |     ld x, 1
+    | }
+    = ok
+
+Here, we try to trash `x` before `goto`ing a routine that inputs `x`.
+
+    | define bar routine
+    |   inputs x
+    |   trashes x, z, n
+    | {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         trash x
+    |         goto bar
+    |     } else {
+    |         trash x
+    |     }
+    |     ld a, 1
+    | }
+    ? UnmeaningfulReadError: x
+
+Here, we declare that main outputs `a`, but we `goto` a routine that does not output `a`.
+
+    | define bar routine
+    |   inputs x
+    |   trashes x, z, n
+    | {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 2
+    |     }
+    |     ld a, 1
+    | }
+    ? UnmeaningfulOutputError: a
+
+Here, we declare that main outputs a, and we goto a routine that outputs a so that's OK.
+
+    | define bar routine
+    |   inputs x
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 200
+    |     ld a, 1
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar
+    |     } else {
+    |         ld x, 2
+    |     }
+    |     ld a, 1
+    | }
+    = ok
+
+Here, we declare that main outputs `a`, and we `goto` two routines, and they both output `a`.
+
+    | define bar0 routine
+    |   inputs x
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld a, x
+    | }
+    | 
+    | define bar1 routine
+    |   inputs x
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld a, 200
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar0
+    |     } else {
+    |         ld x, 2
+    |         goto bar1
+    |     }
+    | }
+    = ok
+
+Here is like just above, but one routine doesn't output `a`.
+
+    | define bar0 routine
+    |   inputs x
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld a, x
+    | }
+    | 
+    | define bar1 routine
+    |   inputs x
+    |   trashes x, z, n
+    | {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar0
+    |     } else {
+    |         ld x, 2
+    |         goto bar1
+    |     }
+    | }
+    ? InconsistentExitError
+
+Here is like the above, but the two routines have different inputs, and that's OK.
+
+    | define bar0 routine
+    |   inputs x
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld a, x
+    | }
+    | 
+    | define bar1 routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld a, 200
+    | }
+    | 
+    | define main routine
+    |   outputs a
+    |   trashes x, z, n
+    | {
+    |     ld x, 0
+    |     if z {
+    |         ld x, 1
+    |         goto bar0
+    |     } else {
+    |         ld x, 2
+    |         goto bar1
+    |     }
+    | }
+    = ok
+
+TODO: we should have a lot more test cases for the above, here.
+
 Can't `goto` a routine that outputs or trashes more than the current routine.

    | define bar routine trashes x, y, z, n {
--- a/tests/SixtyPical
+++ b/tests/SixtyPical
@ -551,6 +551,9 @@ goto.
    | }
    = ok

+The label doesn't have to be defined yet at the point
+in the program text where it is `goto`d.
+
    | define main routine {
    |     goto foo
    | }
@ -559,6 +562,8 @@ goto.
    | }
    = ok

+Syntactically, you can `goto` a vector.
+
    | vector routine foo
    | 
    | define main routine {
@ -566,11 +571,25 @@ goto.
    | }
    = ok

+But you can't `goto` a label that never gets defined.
+
    | define main routine {
    |     goto foo
    | }
    ? SyntaxError

+`goto` may only be the final instruction in a block.
+
+    | define bar routine trashes x, z, n {
+    |     ld x, 200
+    | }
+    | 
+    | define main routine trashes x, z, n {
+    |     goto bar
+    |     ld x, 0
+    | }
+    ? Expected '}', but found 'ld'
+
 Buffers and pointers.

    | buffer[2048] buf