diff --git a/HISTORY.md b/HISTORY.md index 3af0224..3f6ebdb 100644 --- a/HISTORY.md +++ b/HISTORY.md @@ -10,9 +10,13 @@ History of SixtyPical * Accessing zero-page with `ld` and `st` generates zero-page opcodes. * A `byte` or `word` table can be initialized with a list of constants. * Branching and repeating on the `n` flag is now supported. -* The `--optimize-fallthru` option causes the program to be analyzed - for fallthru optimizations; `--dump-fallthru-info` option outputs the - information from this analysis phase, in JSON format, to stdout. +* The `--optimize-fallthru` option causes the routines of the program + to be re-ordered to maximize the number of cases where a `goto`'ed + routine can be simply "falled through" to instead of `JMP`ed to. +* `--dump-fallthru-info` option outputs the information from the + fallthru analysis phase, in JSON format, to stdout. +* Even without fallthru optimization, `RTS` is no longer emitted after + the `JMP` from compiling a final `goto`. * Specifying multiple SixtyPical source files will produce a single compiled result from their combination. * Rudimentary support for Atari 2600 prelude in a 4K cartridge image, diff --git a/README.md b/README.md index 1ab78a9..1c25b7a 100644 --- a/README.md +++ b/README.md @@ -63,6 +63,7 @@ Documentation * [Literate test suite for SixtyPical syntax](tests/SixtyPical%20Syntax.md) * [Literate test suite for SixtyPical analysis](tests/SixtyPical%20Analysis.md) * [Literate test suite for SixtyPical compilation](tests/SixtyPical%20Compilation.md) +* [Literate test suite for SixtyPical fallthru optimization](tests/SixtyPical%20Fallthru.md) * [6502 Opcodes used/not used in SixtyPical](doc/6502%20Opcodes.md) TODO @@ -73,11 +74,6 @@ TODO This preserves them, so that, semantically, they can be used later even though they are trashed inside the block. -### Re-order routines and optimize tail-calls to fallthroughs - -Not because it saves 3 bytes, but because it's a neat trick. Doing it optimally -is probably NP-complete. But doing it adequately is probably not that hard. - ### And at some point... * `low` and `high` address operators - to turn `word` type into `byte`. diff --git a/src/sixtypical/compiler.py b/src/sixtypical/compiler.py index b050af4..de01fba 100644 --- a/src/sixtypical/compiler.py +++ b/src/sixtypical/compiler.py @@ -107,7 +107,7 @@ class Compiler(object): for roster_row in compilation_roster: for routine_name in roster_row[0:-1]: - self.compile_routine(self.routines[routine_name]) + self.compile_routine(self.routines[routine_name], skip_final_goto=True) routine_name = roster_row[-1] self.compile_routine(self.routines[routine_name]) @@ -140,14 +140,18 @@ class Compiler(object): if defn.initial is None and defn.addr is None: self.emitter.resolve_bss_label(label) - def compile_routine(self, routine): + def compile_routine(self, routine, skip_final_goto=False): self.current_routine = routine + self.skip_final_goto = skip_final_goto + self.final_goto_seen = False assert isinstance(routine, Routine) if routine.block: self.emitter.resolve_label(self.get_label(routine.name)) self.compile_block(routine.block) - self.emitter.emit(RTS()) + if not self.final_goto_seen: + self.emitter.emit(RTS()) self.current_routine = None + self.skip_final_goto = False def compile_block(self, block): assert isinstance(block, Block) @@ -353,14 +357,18 @@ class Compiler(object): else: raise NotImplementedError elif opcode == 'goto': - location = instr.location - label = self.get_label(instr.location.name) - if isinstance(location.type, RoutineType): - self.emitter.emit(JMP(Absolute(label))) - elif isinstance(location.type, VectorType): - self.emitter.emit(JMP(Indirect(label))) + self.final_goto_seen = True + if self.skip_final_goto: + pass else: - raise NotImplementedError + location = instr.location + label = self.get_label(instr.location.name) + if isinstance(location.type, RoutineType): + self.emitter.emit(JMP(Absolute(label))) + elif isinstance(location.type, VectorType): + self.emitter.emit(JMP(Indirect(label))) + else: + raise NotImplementedError elif opcode == 'copy': self.compile_copy(instr, instr.src, instr.dest) elif opcode == 'trash': diff --git a/tests/SixtyPical Compilation.md b/tests/SixtyPical Compilation.md index 19c7250..d6562d0 100644 --- a/tests/SixtyPical Compilation.md +++ b/tests/SixtyPical Compilation.md @@ -776,7 +776,7 @@ Indirect call. = $081E JMP ($0822) = $0821 RTS -goto. +Compiling `goto`. Note that no `RTS` is emitted after the `JMP`. | routine bar | inputs y @@ -794,10 +794,9 @@ goto. | goto bar | } = $080D LDY #$C8 - = $080F JMP $0813 - = $0812 RTS - = $0813 LDX #$C8 - = $0815 RTS + = $080F JMP $0812 + = $0812 LDX #$C8 + = $0814 RTS ### Vector tables diff --git a/tests/SixtyPical Fallthru.md b/tests/SixtyPical Fallthru.md index 9ae5a7f..7f50462 100644 --- a/tests/SixtyPical Fallthru.md +++ b/tests/SixtyPical Fallthru.md @@ -25,7 +25,7 @@ or it does not potentially fall through to any routine. More formally, we can say -fall : R → R ∪ {nil}, fall(r) ≠ r +> fall : R → R ∪ {nil}, fall(r) ≠ r where `nil` is an atom that represents no routine. @@ -247,10 +247,7 @@ Note this currently reflects the re-ordering, but does not remove the jmp/rts. | { | goto foo | } - = $080D JMP $0811 - = $0810 RTS - = $0811 LDA #$00 - = $0813 RTS - = $0814 LDA #$FF - = $0816 JMP $0811 - = $0819 RTS + = $080D LDA #$00 + = $080F RTS + = $0810 LDA #$FF + = $0812 JMP $080D