From f92056d6401ea4cdef83ec69c1726b47ffb690d9 Mon Sep 17 00:00:00 2001 From: Chris Pressey Date: Fri, 16 Oct 2015 09:30:24 +0100 Subject: [PATCH] REBOOT the entire language & implementation as version 0.2-PRE. --- .gitignore | 2 +- .hgignore | 2 +- README.markdown | 237 +---------- bin/sixtypical | 62 +++ build.sh | 23 -- clean.sh | 3 - doc/Analyzing.markdown | 372 ----------------- doc/Checking.markdown | 450 -------------------- doc/Emitting.markdown | 456 -------------------- doc/Instruction_Support.markdown | 320 --------------- doc/SixtyPical.md | 407 ++++++++++++++++++ eg/add-fail.60p | 6 + eg/add-pass.60p | 8 + eg/cinv.60p | 15 - eg/demo.60p | 110 ----- eg/example.60p | 14 + eg/game.60p | 365 ---------------- eg/hello-world.60p | 13 - eg/if.60p | 11 + eg/screen1.60p | 10 - eg/screen2.60p | 7 - eg/screen3.60p | 14 - lib/basic_header.oph | 9 - loadngo.sh | 8 - src/Main.hs | 48 --- src/SixtyPical/Analyzer.hs | 176 -------- src/SixtyPical/Checker.hs | 86 ---- src/SixtyPical/Context.hs | 93 ----- src/SixtyPical/Emitter.hs | 281 ------------- src/SixtyPical/Model.hs | 204 --------- src/SixtyPical/Parser.hs | 685 ------------------------------- src/SixtyPical/Transformer.hs | 279 ------------- src/sixtypical/__init__.py | 0 src/sixtypical/analyzer.py | 159 +++++++ src/sixtypical/ast.py | 37 ++ src/sixtypical/evaluator.py | 146 +++++++ src/sixtypical/objects.py | 31 ++ src/sixtypical/parser.py | 217 ++++++++++ test.sh | 8 +- tests/SixtyPical Analysis.md | 415 +++++++++++++++++++ tests/SixtyPical Execution.md | 371 +++++++++++++++++ 41 files changed, 1907 insertions(+), 4253 deletions(-) create mode 100755 bin/sixtypical delete mode 100755 build.sh delete mode 100755 clean.sh delete mode 100644 doc/Analyzing.markdown delete mode 100644 doc/Checking.markdown delete mode 100644 doc/Emitting.markdown delete mode 100644 doc/Instruction_Support.markdown create mode 100644 doc/SixtyPical.md create mode 100644 eg/add-fail.60p create mode 100644 eg/add-pass.60p delete mode 100644 eg/cinv.60p delete mode 100644 eg/demo.60p create mode 100644 eg/example.60p delete mode 100644 eg/game.60p delete mode 100644 eg/hello-world.60p create mode 100644 eg/if.60p delete mode 100644 eg/screen1.60p delete mode 100644 eg/screen2.60p delete mode 100644 eg/screen3.60p delete mode 100644 lib/basic_header.oph delete mode 100755 loadngo.sh delete mode 100644 src/Main.hs delete mode 100644 src/SixtyPical/Analyzer.hs delete mode 100644 src/SixtyPical/Checker.hs delete mode 100644 src/SixtyPical/Context.hs delete mode 100644 src/SixtyPical/Emitter.hs delete mode 100644 src/SixtyPical/Model.hs delete mode 100644 src/SixtyPical/Parser.hs delete mode 100644 src/SixtyPical/Transformer.hs create mode 100644 src/sixtypical/__init__.py create mode 100644 src/sixtypical/analyzer.py create mode 100644 src/sixtypical/ast.py create mode 100644 src/sixtypical/evaluator.py create mode 100644 src/sixtypical/objects.py create mode 100644 src/sixtypical/parser.py create mode 100644 tests/SixtyPical Analysis.md create mode 100644 tests/SixtyPical Execution.md diff --git a/.gitignore b/.gitignore index 5c6f17f..4790153 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,3 @@ *.o *.hi -bin/* +*.pyc diff --git a/.hgignore b/.hgignore index 7442683..dd75852 100644 --- a/.hgignore +++ b/.hgignore @@ -2,4 +2,4 @@ syntax: glob *.o *.hi -bin/* +*.pyc diff --git a/README.markdown b/README.markdown index 3cd5242..1f0295c 100644 --- a/README.markdown +++ b/README.markdown @@ -2,227 +2,26 @@ SixtyPical ========== SixtyPical is a very low-level programming language, similar to 6502 assembly, -with static analysis through type-checking and abstract interpretation. +with static analysis through abstract interpretation. + +In practice, this means it catches things like + +* you forgot to clear carry before adding something to the accumulator +* a subroutine that you call trashes a register you thought was preserved + +and suchlike. It is a **work in progress**, currently at the **proof-of-concept** stage. -It is expected that a common use case for SixtyPical would be retroprogramming -for the Commodore 64 and other 6502-based computers such as the VIC-20, the -Apple ][+, and the NES. +The current version is 0.2-PRE. It is a complete reboot of SixtyPical 0.1. +The reference implementation is written in Python instead of Haskell. +The language is much simpler — we're going to try to get the analysis +completely right before adding more sophisticated and useful features. -Many SixtyPical instructions map precisely to 6502 opcodes. However, SixtyPical -is not an assembly language: the programmer does not have total control over -the layout of code and data in memory. Some 6502 opcodes have no SixtyPical -equivalent, while some have an equivalent that acts in a slightly different -(but intuitively related) way. And some commands are unique to SixtyPical. +Documentation: -`sixtypical` is the reference implementation of SixtyPical. It is written in -Haskell. It can currently parse and check a SixtyPical program, and can -emit an Ophis assembler listing for it. - -SixtyPical itself is distributed under a BSD-style open-source license, while -the example SixtyPical programs in the `eg` directory are in the public domain. -See the file `LICENSE` for more information. - -Quick Start ------------ - -If you have `ghc`, Ophis, and VICE 2.4 installed, clone this repo, `cd` into it, -and run - - ./loadngo.sh eg/game.60p - -The Big Idea(s) ---------------- - -### Typed Addresses ### - -SixtyPical distinguishes several kinds of addresses: those that hold a byte, -those that hold a word (in low-byte-high-byte sequence), those that are the -beginning of a table of bytes, and vectors (those that hold a word pointer to a -machine-language routine.) It prevents the program from accessing them in -certain ways. For example, these are illegal: - - reserve byte lives - reserve word score - routine do_it { - lda score // no! can't treat word as if it were a byte - lda lives, x // no! can't treat a byte as if it were a table - } - -### Abstract Interpretation ### - -SixtyPical tries to prevent the program from using data that has no meaning. - -The instructions of a routine are analyzed using abstract interpretation. -One thing we specifically do is determine which registers and memory locations -are *not* affected by the routine. For example, the following: - - routine do_it { - lda #0 - jsr update_score - sta vic_border_colour // uh... what do we know about reg A here? - } - -...is illegal *unless* one of the following is true: - -* the A register is declared to be a meaningful output of `update_score` -* `update_score` was analyzed and determined to not change the value of the - A register - -The first case must be done with an explicit declaration on `update_score`. -The second case will be be inferred using abstract interpretation of the code -of `update_score`. - -### Structured Programming ### - -SixtyPical eschews labels for code and instead organizes code into _blocks_. - -Instead of the assembly-language subroutine, SixtyPical provides the _routine_ -as the abstraction for a reusable sequence of code. A routine may be called, -or may be included inline, by another routine. The body of a routine is a -block. - -Along with routines, you get `if`, `repeat`, and `with` constructs which take -blocks. The `with` construct takes an instruction like `sei` and implicitly -(and unavoidably) inserts the corresponding `cli` at the end of the block. - -Abstract interpretation extends to `if` blocks. The two incoming contexts are -merged, and any storage locations poisoned in either context are considered -poisoned in the result context. (A similar case applies to `repeat` and -`with`, but these are different too as there is only one block and it is always -executed at least once.) - -Declarations can have block scope. Such declarations may only be used within -the block in which they are declared. `reserve`d storage inside a block is not, -however, like a local variable (or `auto` in C); rather, it is more like a -`static` in C, except the value at that address is not guaranteed to be -retained between invokations of the block. This is intended to be used for -temporary storage. In addition, if analysis of the call graph indicates that -two such temporary addresses are never used simultaneously, they may be merged -to the same address. (This is, however, not yet implemented, and may not be -implemented for a while.) - -### Pseudo-Instructions ### - -Along with instructions which map to the 6502 instruction set, SixtyPical -supplies some instructions which are slightly more abstract and powerful. -For lack of a better term, I'm calling them "pseudo-instructions" here. -(But I would really like a better term.) - -In a macro assembler, these pseudo-instructions would be implemented with -macros. However, macros, being textual-substitution-based, are a pain to -analyze. By providing the functions as built-in instructions, we can -easily work them into the type system. Also, there are some macros that are -so common and useful that it makes sense for them to be built-ins, with -standardized, prescriptive names. - -Such pseudo-instructions are: - -* `copy`, which copies a value from one storage location to another. - This is a typesafe way to copy 16-bit `word`s and `vector`s. - In the future, it may handle 8-bit values and immediate values too. -* `save`, which is not yet implemented. Intended to be used in `with` - blocks when you want to save a value but you don't want to use the - stack. Pairs well with block-level temporary `reserve`d addresses. - -### "It's a Partial Solution" ### - -SixtyPical does not attempt to force your typed, abstractly interpreted -program to be absolutely watertight. In assembly language on an 8-bit -microprocessor, you will sometimes _need_ to do dangerous and tricky things, -like self-modifying code and cycle-counting, in order to accomplish a -sophisticated effect, like a raster interrupt trick. - -For that reason, `sixtypical` does not attempt to emit a fully-formed -Ophis assembler source. Instead, it expects you to mix its output with -some raw Ophis assembler to make a complete program. This "mixin" may contain -as much unchecked assembler code as you like. An example is provided in the -`lib` directory which adds a prelude that makes the resulting program -runnable from Commodore BASIC 2.0 and stores uninitialized data at `$C000`. - -In addition, various checks are not attempted (such as tracking the usage -of an indirect indexed table) and other checks may be subverted (for example -by `assign`ing two variables with two different types of storage at the same -address.) - -In summary, SixtyPical helps you write a very-nearly-assembly-level program -which is a bit more "solid" than raw assembly, but it still expects you to -know what you're doing down there. - -For More Information --------------------- - -For more information, see the docs (which are written in the form of -[Falderal](http://catseye.tc/node/Falderal) literate test suites. If you -have `falderal` on your executable search path, you can run the tests with -`./test.sh`.) - -* [Checking](https://github.com/catseye/SixtyPical/blob/master/doc/Checking.markdown) -* [Analyzing](https://github.com/catseye/SixtyPical/blob/master/doc/Analyzing.markdown) -* [Emitting](https://github.com/catseye/SixtyPical/blob/master/doc/Emitting.markdown) -* [Instruction Support](https://github.com/catseye/SixtyPical/blob/master/doc/Instruction_Support.markdown) - -Internals ---------- - -Some (OK, a lot) of the Haskell code is kind of gross and non-idiomatic. -The parser, in particular, could not be described as "elegant". There -could definitely be more higher-order functions defined and used. At the -same time, I'm really not a fan of pointless style — I prefer it when things -are written out explicitly and pedantically. Still, there are places where -an added `foldr` or two would not be unwelcome... - -The 6502 semantics, which are arguably RISC-like (load/store architecture) -are translated into an intermediate representation which is arguably CISC-like. -For example, `lda`, `sta`, `ldx`, and `tax` all become kinds of `COPY` -internally. This internal instruction set is much smaller than the 6502's, -and thus is usually easier to analyze. It would also be easier to adapt to -other instruction sets, such as the Z80 or the 8086. - -Notes ------ - -This is not quite the right place for this, but I need to write it down -somewhere: - -6502 machine code supports an indirect `jmp`, but not an indirect `jsr`. -But an indirect `jsr` is very easy to simulate with an indirect `jmp`. -Instead of - - launch: - copy whatever to vector - jsr (vector) - ... - -Just say - - launch: - copy whatever to vector - jsr indirect_jsr - ... - - indirect_jsr: - jmp (vector) - -Then the `rts` at the end of your routine pointed to by `vector` will -return you to where you `jsr`ed. - -Because the above is so easy to write, SixtyPical will probably not support -a `jsr (vector)` form (unless it would somehow make analysis easier, but -it probably won't.) - -TODO ----- - -* Addressing modes — indexed mode on more instructions -* Rename and lift temporaries in nested blocks -* Tail-recursion optimization -* `word 100` to promote an otherwise 8-bit literal to a 16-bit value -* `jmp routine` -* Enforce that `jmp`s come at ends of blocks(?) -* `outputs` on externals -* Routine is a kind of StorageLocation? (Location)? -* Test that `pha` restores the A register -* Test poisonining of flags -* Test output of flags +* [doc/SixtyPical.md](SixtyPical.md) — the spec +* [tests/SixtyPical Execution.md](SixtyPical Execution.md) — + literate test suite for running SixtyPical programs +* [tests/SixtyPical Analysis.md](SixtyPical Analysis.md) — + literate test suite for statically analyzing SixtyPical programs diff --git a/bin/sixtypical b/bin/sixtypical new file mode 100755 index 0000000..ff6d85f --- /dev/null +++ b/bin/sixtypical @@ -0,0 +1,62 @@ +#!/usr/bin/env python + +"""Usage: sixtypical [OPTIONS] FILES + +Analyzes and/or executes and/or compiles a Sixtypical program. +""" + +from os.path import realpath, dirname, join +import sys + +sys.path.insert(0, join(dirname(realpath(sys.argv[0])), '..', 'src')) + +# ----------------------------------------------------------------- # + +import codecs +from optparse import OptionParser +import sys +import traceback + +from sixtypical.parser import Parser +from sixtypical.evaluator import eval_program +from sixtypical.analyzer import analyze_program + + +if __name__ == '__main__': + optparser = OptionParser(__doc__.strip()) + + optparser.add_option("--analyze", + action="store_true", dest="analyze", default=False, + help="") + optparser.add_option("--compile", + action="store_true", dest="compile", default=False, + help="") + optparser.add_option("--traceback", + action="store_true", dest="traceback", default=False, + help="") + optparser.add_option("--execute", + action="store_true", dest="execute", default=False, + help="") + + (options, args) = optparser.parse_args(sys.argv[1:]) + + for filename in args: + text = open(filename).read() + p = Parser(text) + program = p.program() + + if options.analyze: + try: + analyze_program(program) + except Exception as e: + if options.traceback: + raise + else: + traceback.print_exception(e.__class__, e, None) + sys.exit(1) + print 'ok' + + if options.execute: + context = eval_program(program) + for key, value in sorted(context.iteritems()): + print "%s: %s" % (key, value) diff --git a/build.sh b/build.sh deleted file mode 100755 index 6334cbe..0000000 --- a/build.sh +++ /dev/null @@ -1,23 +0,0 @@ -#!/bin/sh - -PROG=sixtypical - -if [ x`which ghc` = x -a x`which runhugs` = x ]; then - echo "Neither ghc nor runhugs found on search path." - exit 1 -fi - -mkdir -p bin - -if [ x`which ghc` = x -o ! x$USE_HUGS = x ]; then - # create script to run with Hugs - cat >bin/$PROG <<'EOF' -#!/bin/sh -THIS=`realpath $0` -DIR=`dirname $THIS`/../src -runhugs $DIR/Main.hs $* -EOF - chmod 755 bin/$PROG -else - cd src && ghc --make Main.hs -o ../bin/$PROG -fi diff --git a/clean.sh b/clean.sh deleted file mode 100755 index c45940a..0000000 --- a/clean.sh +++ /dev/null @@ -1,3 +0,0 @@ -#!/bin/sh - -rm -f src/*.hi src/*.o src/*/*.hi src/*/*.o diff --git a/doc/Analyzing.markdown b/doc/Analyzing.markdown deleted file mode 100644 index 814833c..0000000 --- a/doc/Analyzing.markdown +++ /dev/null @@ -1,372 +0,0 @@ -Analyzing SixtyPical Programs -============================= - - -> Tests for functionality "Analyze SixtyPical program" - - -> Functionality "Analyze SixtyPical program" is implemented by - -> shell command "bin/sixtypical analyze %(test-body-file)" - -Analysis determines what storage locations have been modified by a -routine. - - | reserve byte score - | routine main { - | lda #4 - | sta score - | } - = main ([]) - = A: UpdatedWith (Immediate 4) - = NamedLocation Nothing "score": UpdatedWith A - -A routine cannot expect registers which a called routine does not -preserve, to be preserved. We say the called routine "poisons" those -registers. - - | assign byte border_colour 4000 - | reserve byte score - | routine update_score - | { - | lda #8 - | sta score - | } - | routine main { - | lda #4 - | jsr update_score - | sta border_colour - | } - ? routine 'main' does not preserve 'A' - -But if a called routine does preserve those registers, the caller can -continue to use them after calling the routine. - - | assign byte border_colour 4000 - | reserve byte score - | routine update_score - | { - | ldx score - | inx - | stx score - | } - | routine main { - | lda #4 - | jsr update_score - | sta border_colour - | } - = main ([]) - = A: UpdatedWith (Immediate 4) - = X: PoisonedWith (Immediate 1) - = NamedLocation Nothing "border_colour": UpdatedWith A - = NamedLocation Nothing "score": PoisonedWith X - = - = update_score ([]) - = X: UpdatedWith (Immediate 1) - = NamedLocation Nothing "score": UpdatedWith X - -Not only registers, but also named variables, can be poisoned by a called -routine. - - | reserve byte score - | routine update_score - | { - | lda #8 - | sta score - | } - | routine main { - | jsr update_score - | lda score - | } - ? routine 'main' does not preserve 'NamedLocation Nothing "score"' - -Of course, the difference between poisoning and intentionally modifying a -storage location is a matter of intent. The solution to the above is to -explicitly notate `update_score` as an "output" of the routine. - - | assign byte border_colour 4000 - | reserve byte score - | routine update_score outputs (score) - | { - | lda #8 - | sta score - | } - | routine main { - | ldx score - | jsr update_score - | ldx score - | } - = main ([]) - = A: PoisonedWith (Immediate 8) - = X: UpdatedWith (NamedLocation Nothing "score") - = NamedLocation Nothing "score": UpdatedWith A - = - = update_score ([NamedLocation Nothing "score"]) - = A: UpdatedWith (Immediate 8) - = NamedLocation Nothing "score": UpdatedWith A - -Routines can name registers as outputs. - - | reserve byte score - | routine update_score - | { - | lda #8 - | } - | routine main { - | jsr update_score - | sta score - | } - ? routine 'main' does not preserve 'A' - - | reserve byte score - | routine update_score outputs (.a) - | { - | lda #8 - | } - | routine main { - | jsr update_score - | sta score - | } - = main ([]) - = A: UpdatedWith (Immediate 8) - = NamedLocation Nothing "score": UpdatedWith A - = - = update_score ([A]) - = A: UpdatedWith (Immediate 8) - -If a location is poisoned in either branch of an `if`, it is poisoned -after the `if`. Note there are several tests for this. - - | reserve byte score - | routine update_score - | { - | if beq { - | lda #8 - | } else { - | ldx #8 - | } - | } - | routine main { - | lda #4 - | jsr update_score - | sta score - | } - ? routine 'main' does not preserve 'A' - - | reserve byte score - | routine update_score - | { - | if beq { - | ldx #8 - | } else { - | lda #8 - | } - | } - | routine main { - | lda #4 - | jsr update_score - | sta score - | } - ? routine 'main' does not preserve 'A' - - | reserve byte score - | routine update_score - | { - | lda #4 - | sta score - | } - | routine main { - | lda #4 - | if beq { - | jsr update_score - | } else { - | ldx #3 - | } - | sta score - | } - ? routine 'main' does not preserve 'A' - - | reserve byte score - | routine update_score - | { - | lda #4 - | sta score - | } - | routine main { - | lda #4 - | if beq { - | ldx #3 - | } else { - | jsr update_score - | } - | sta score - | } - ? routine 'main' does not preserve 'A' - - | reserve byte score - | routine update_score - | { - | ldx #4 - | stx score - | } - | routine main { - | lda #4 - | if beq { - | jsr update_score - | } else { - | ldx #4 - | } - | sta score - | } - = main ([]) - = A: UpdatedWith (Immediate 4) - = X: PoisonedWith (Immediate 4) - = NamedLocation Nothing "score": UpdatedWith A - = - = update_score ([]) - = X: UpdatedWith (Immediate 4) - = NamedLocation Nothing "score": UpdatedWith X - - | assign word position $fb - | reserve byte value - | - | routine reset_position { - | lda #$00 - | sta position - | } - | - | routine main { - | inc value - | lda value - | ldy #0 - | sta (position), y - | if beq { - | jsr reset_position - | } else { - | } - | } - = main ([]) - = A: PoisonedWith (Immediate 4) - = Y: UpdatedWith (Immediate 0) - = IndirectIndexed (NamedLocation Nothing "position") Y: UpdatedWith A - = NamedLocation Nothing "position": PoisonedWith A - = NamedLocation Nothing "value": UpdatedWith (Immediate 1) - = - = reset_position ([]) - = A: UpdatedWith (Immediate 4) - = NamedLocation Nothing "position": UpdatedWith A - - | assign word position $fb - | reserve byte value - | - | routine reset_position { - | lda #$00 - | sta position - | } - | - | routine main { - | inc value - | lda value - | ldy #0 - | sta (position), y - | if beq { - | jsr reset_position - | } else { - | } - | sta value - | } - ? routine 'main' does not preserve 'A' - - | assign word position $fb - | reserve byte value - | - | routine reset_position { - | lda #$00 - | sta position - | } - | - | routine main { - | inc value - | lda value - | ldy #0 - | sta (position), y - | jsr reset_position - | if beq { - | } else { - | sta value - | } - | } - ? routine 'main' does not preserve 'A' - -A storage location poisoned in a `repeat` continues to be poisoned -after the `repeat`. - - | reserve byte value - | - | routine blah { - | lda #123 - | } - | routine main { - | lda #33 - | ldy #255 - | repeat bne { - | jsr blah - | dey - | } - | sta value - | } - ? routine 'main' does not preserve 'A' - -Oh, here's a tricky one. The accumulator isn't poisoned on the first run -through the `repeat`, but it **is** on the second run through. We handle -this simply by abstractly interpreting the `repeat`'s block twice — the -second time in the context of having already interpreted it once. - - | reserve byte value - | - | routine blah { - | lda #123 - | } - | routine main { - | lda #33 - | ldy #255 - | repeat bne { - | sta value - | jsr blah - | dey - | } - | } - ? routine 'main' does not preserve 'A' - -Poisoning a high byte or low byte of a word poisons the whole word. - - | reserve word score - | reserve byte temp - | routine update_score - | { - | ldx #4 - | stx score - | sta temp - | } - ? routine 'main' does not preserve 'NamedLocation Nothing "score"' - - | reserve word score - | reserve byte temp - | routine update_score - | { - | ldx #4 - | stx >score - | } - | routine main { - | jsr update_score - | lda Tests for functionality "Parse SixtyPical program" - - -> Functionality "Parse SixtyPical program" is implemented by - -> shell command "bin/sixtypical parse %(test-body-file)" - - -> Tests for functionality "Check SixtyPical program" - - -> Functionality "Check SixtyPical program" is implemented by - -> shell command "bin/sixtypical check %(test-body-file)" - -Some Basic Syntax ------------------ - -`main` must be present. - - | routine main { - | nop - | } - = True - - | routine frog { - | nop - | } - ? missing 'main' routine - -Each instruction need not appear on its own line. (Although you probably -still want to write in that style, for consistency with assembly code.) - - | routine main { - | nop lda #1 ldx #1 nop - | } - = True - -Javascript-style block and line comments are both supported. -They may appear anywhere whitespace may appear. - - | reserve byte lives /* fnord */ - | assign byte gdcol 647 // fnord - | external blastoff 4 // fnnnnnnnnnnnnnnnnfffffffff - | - | routine /* hello */ main { - | /* this routine does everything you need. */ - | lda #1 // we assemble the fnord using - | ldx #1 // multiple lorem ipsums which - | ldy #1 - | lda #1 /* we - | found under the bridge by the old mill yesterday */ - | ldx #1 - | } - = True - -Addresses ---------- - -An address may be declared with `reserve`, which is like `.data` or `.bss` -in an assembler. This is an address into the program's data. It is global -to all routines. - - | reserve byte lives - | routine main { - | lda #3 - | sta lives - | } - | routine died { - | dec lives - | } - = True - -An address declared with `reserve` may be given an initial value. - - | reserve byte lives : 3 - | routine main { - | sta lives - | } - | routine died { - | dec lives - | } - = True - -A byte table declared with `reserve` may be given an initial value consisting -of a sequence of bytes. - - | reserve byte[4] table : (0 $40 $10 20) - | routine main { - | ldy #0 - | lda table, y - | } - | routine died { - | sta table, y - | } - = True - -A byte table declared with `reserve` may be given an initial value consisting -of a sequence of bytes represented as a character string. - - | reserve byte[4] table : "What" - | routine main { - | ldy #0 - | lda table, y - | } - | routine died { - | sta table, y - | } - = True - -When a byte table declared with `reserve` is given an initial value consisting -of a sequence of bytes, it must be the same length as the table is declared. - - | reserve byte[4] table : (0 $40 $10 20 60 70 90) - | routine main { - | ldy #0 - | lda table, y - | } - | routine died { - | sta table, y - | } - ? initial table incorrect size - - | reserve byte[4] table : "Hello, world!" - | routine main { - | ldy #0 - | lda table, y - | } - | routine died { - | sta table, y - | } - ? initial table incorrect size - -We can also define word and vector tables. These are each stored as two -byte tables, one table of low bytes and one table of high bytes. - - | reserve word[100] words - | reserve vector[100] vectors - | routine main { - | lda #$04 - | sta words - | // sta >words, y - | // copy routine main to vectors, y - | } - = True - -An address may be declared with `assign`, which is like `.alias` in an -assembler, with the understanding that the value will be treated "like an -address." This is generally an address into the operating system or hardware -(e.g. kernal routine, I/O port, etc.) - - | assign byte screen $0400 - | routine main { - | lda #0 - | sta screen - | } - = True - -The body of a routine may not refer to an address literally. It must use -a symbol that was declared previously with `reserve` or `assign`. - - | routine main { - | lda #0 - | sta $0400 - | } - ? unexpected - - | assign byte screen $0400 - | routine main { - | lda #0 - | sta screen - | } - = True - -Test for many combinations of `reserve` and `assign`. - - | reserve byte lives - | assign byte gdcol 647 - | reserve word score - | assign word memstr 641 - | reserve vector v - | assign vector cinv 788 - | reserve byte[16] frequencies - | assign byte[256] screen 1024 - | routine main { - | nop - | } - = True - -`reserve` may be block-level. - - | routine main { - | reserve byte lives - | lda lives - | } - = True - -Block-level declarations are only visible in the block in which they are -declared. - - | routine main { - | reserve byte lives - | lda #3 - | sta lives - | } - | routine died { - | dec lives - | } - ? undeclared location 'lives' - -A block-level `reserve` may not supply an initial value. - - | routine main { - | reserve byte lives : 3 - | lda lives - | } - ? block-level 'lives' cannot supply initial value - -A program may declare an `external`. - - | external blastoff 49152 - | routine main { - | jsr blastoff - | } - = True - -All declarations (`reserve`s and `assign`s) must come before any `routines`. - - | routine main { - | lda score - | } - | reserve word score - ? expecting "routine" - -All locations used in all routines must be declared first. - - | reserve byte score - | routine main { - | lda score - | cmp screen - | } - ? undeclared location - -Even in inner blocks. - - | reserve byte score - | assign byte screen 1024 - | routine main { - | lda score - | cmp screen - | if beq { - | lda score - | } else { - | lda fnord - | } - | } - ? undeclared location - -Block-level declarations are visible in inner blocks. - - | routine main { - | reserve byte lives - | with sei { - | if beq { - | lda #3 - | repeat bne { - | sta lives - | } - | } else { - | sta lives - | } - | } - | } - = True - -A block-level `reserve` may not supply an initial value. - - | routine main { - | reserve byte lives : 3 - | lda lives - | } - ? block-level 'lives' cannot supply initial value - -All routines jsr'ed to must be defined, or external. - - | routine main { - | jsr blastoff - | } - ? undeclared routine - -No duplicate location names in declarations. - - | reserve word score - | assign word score 4000 - | routine main { - | nop - | } - ? duplicate location name - -No duplicate routine names. - - | routine main { - | nop - | } - | routine main { - | txa - | } - ? duplicate routine name - -No duplicate routine names, including externals. - - | external main 7000 - | routine main { - | nop - | } - ? duplicate routine name - -We can jump indirectly through a vector. - - | reserve vector blah - | routine main { - | jmp (blah) - | } - = True - -We can't jump indirectly through a word. - - | reserve word blah - | routine main { - | jmp (blah) - | } - ? jmp to non-vector - -We can't jump indirectly through a byte. - - | assign byte screen 1024 - | routine main { - | jmp (screen) - | } - ? jmp to non-vector - -We can absolute-indexed a byte table. - - | assign byte[256] screen 1024 - | routine main { - | sta screen, x - | } - = True - -We cannot absolute-indexed a byte. - - | assign byte screen 1024 - | routine main { - | sta screen, x - | } - ? indexed access of non-table - -We cannot absolute-indexed a word. - - | assign word screen 1024 - | routine main { - | sta screen, x - | } - ? indexed access of non-table - -We cannot absolute access a word. - - | assign word screen 1024 - | routine main { - | ldx screen - | } - ? incompatible types 'Word' and 'Byte' - -No, not even with `ora`. - - | assign word screen 1024 - | routine main { - | ora screen - | } - ? incompatible types 'Byte' and 'Word' - -Instead, we have to do this. - - | assign word screen 1024 - | routine main { - | lda screen - | } - = True - -We cannot absolute access a vector. - - | assign vector screen 1024 - | routine main { - | lda screen - | } - ? incompatible types 'Vector' and 'Byte' - -### Addresses ### - -An address knows what kind of data is stored at the address: - -* `byte`: an 8-bit byte. not part of a word. not to be used as an address. - (could be an index though.) -* `word`: a 16-bit word. not to be used as an address. -* `vector`: a 16-bit address of a routine. Only a handful of operations - are supported on vectors: - - * copying the contents of one vector to another - * copying the address of a routine into a vector - * jumping indirectly to a vector (i.e. to the code at the address - contained in the vector (and this can only happen at the end of a - routine (NYI)) - * `jsr`'ing indirectly to a vector (which is done with a fun - generated trick (NYI)) - -* `byte [SIZE]`: a series of `SIZE` `byte`s contiguous in memory starting - from the address. This is the only kind of address that can be used in - indexed addressing. `SIZE` has a minimum of 1 and a maximum of 256. - -### Blocks ### - -Each routine is a block. It may be composed of inner blocks, if those -inner blocks are attached to certain instructions. - -SixtyPical does not have instructions that map literally to the 6502 branch -instructions. Instead, it has an `if` construct, with two blocks (for the -"then" and `else` parts), and the branch instructions map to conditions for -this construct. - -Similarly, there is a `repeat` construct. The same branch instructions can -be used in the condition to this construct. In this case, they branch back -to the top of the `repeat` loop. - -The abstract states of the machine at each of the different block exits are -merged during analysis. If any register or memory location is treated -inconsistently (e.g. updated in one branch of the test, but not the other,) -that register cannot subsequently be used without a declaration to the effect -that we know what's going on. (This is all a bit fuzzy right now.) - -There is also no `rts` instruction. It is included at the end of a routine, -but only when the routine is used as a subroutine. Also, if the routine -ends by `jsr`ing another routine, it reserves the right to do a tail-call -or even a fallthrough. - -There are also _with_ instructions, which are associated with three opcodes -that have natural symmetrical opcodes: `pha`, `php`, and `sei`. These -instructions take a block. The natural symmetrical opcode is inserted at -the end of the block. diff --git a/doc/Emitting.markdown b/doc/Emitting.markdown deleted file mode 100644 index 95824e9..0000000 --- a/doc/Emitting.markdown +++ /dev/null @@ -1,456 +0,0 @@ -Emitting Ophis from SixtyPical Programs -======================================= - - -> Tests for functionality "Emit ASM for SixtyPical program" - - -> Functionality "Emit ASM for SixtyPical program" is implemented by - -> shell command "bin/sixtypical emit %(test-body-file)" - -Emitting an `if`. - - | assign byte screen $0400 - | routine main { - | lda screen - | cmp screen - | if beq { - | tax - | } else { - | tay - | } - | sta screen - | } - = main: - = lda screen - = cmp screen - = BEQ _label_1 - = tay - = jmp _past_1 - = _label_1: - = tax - = _past_1: - = sta screen - = rts - = - = .data - = .alias screen 1024 - -Emitting a `repeat`. - - | assign byte screen 1024 - | reserve byte four : $04 - | routine main { - | ldy four - | repeat bne { - | inc screen - | dey - | cpy four - | } - | sty screen - | } - = main: - = ldy four - = - = _repeat_1: - = inc screen - = dey - = cpy four - = BNE _repeat_1 - = sty screen - = rts - = - = four: .byte 4 - = .data - = .alias screen 1024 - -Nested ifs. - - | routine main { - | if beq { - | if bcc { - | lda #0 - | } else { - | if bvs { - | lda #1 - | } else { - | lda #2 - | } - | } - | } else { - | lda #3 - | } - | } - = main: - = BEQ _label_3 - = lda #3 - = jmp _past_3 - = _label_3: - = BCC _label_2 - = BVS _label_1 - = lda #2 - = jmp _past_1 - = _label_1: - = lda #1 - = _past_1: - = jmp _past_2 - = _label_2: - = lda #0 - = _past_2: - = _past_3: - = rts - -Installing an interrupt handler (at the Kernal level, i.e. with CINV) - - | assign byte screen 1024 - | assign vector cinv 788 - | reserve vector save_cinv - | - | routine main { - | with sei { - | copy cinv save_cinv - | copy routine our_cinv to cinv - | } - | } - | - | routine our_cinv { - | inc screen - | jmp (save_cinv) - | } - = main: - = sei - = lda cinv - = sta save_cinv - = lda cinv+1 - = sta save_cinv+1 - = lda #our_cinv - = sta cinv+1 - = cli - = rts - = - = our_cinv: - = inc screen - = jmp (save_cinv) - = rts - = - = .data - = .alias screen 1024 - = .alias cinv 788 - = .space save_cinv 2 - -Copy command: immediate -> byte - - | reserve byte position - | routine main { - | copy #23 position - | } - = main: - = lda #23 - = sta position - = rts - = - = .data - = .space position 1 - -Copy command: immediate -> word - - | reserve word position - | routine main { - | copy #$0400 position - | } - = main: - = lda #0 - = sta position - = lda #4 - = sta position+1 - = rts - = - = .data - = .space position 2 - -Copy command: byte-sized immediate -> word - -Disabled for now. - - | reserve word position - | routine main { - | copy #1 position - | } - = main: - = lda #1 - = sta position - = lda #0 - = sta position+1 - = rts - = - = .data - = .space position 2 - -Copy command: word -> word - - | reserve word position1 - | reserve word position2 - | routine main { - | copy position1 position2 - | } - = main: - = lda position1 - = sta position2 - = lda position1+1 - = sta position2+1 - = rts - = - = .data - = .space position1 2 - = .space position2 2 - -Copy command: word -> word indexed - - | reserve word loc - | reserve word[4] locs - | routine main { - | ldy #0 - | copy loc locs, y - | } - = main: - = ldy #0 - = lda loc - = sta locs_lo, y - = lda loc+1 - = sta locs_hi, y - = rts - = - = .data - = .space loc 2 - = .space locs_lo 4 - = .space locs_hi 4 - -Copy command: word INDEXED -> word - - | reserve word loc - | reserve word[4] locs - | routine main { - | ldx #0 - | copy locs, x loc - | } - = main: - = ldx #0 - = lda locs_lo, x - = sta loc - = lda locs_hi, x - = sta loc+1 - = rts - = - = .data - = .space loc 2 - = .space locs_lo 4 - = .space locs_hi 4 - -Copy command: byte -> indexed word table -> error. - - | reserve byte bbb - | reserve word[4] locs - | routine main { - | ldx #0 - | copy bbb locs, x - | } - ? incompatible types 'Byte' and 'Table Word 4' - -Copy command: byte -> low byte of indexed word table - - | reserve byte bbb - | reserve word[4] locs - | routine main { - | ldx #0 - | copy bbb high byte of indexed word table - - | reserve byte bbb - | reserve word[4] locs - | routine main { - | ldx #0 - | copy bbb >locs, x - | } - = main: - = ldx #0 - = lda bbb - = sta locs_hi, x - = rts - = - = .data - = .space bbb 1 - = .space locs_lo 4 - = .space locs_hi 4 - -Copy command: low byte of indexed word table -> byte - - | reserve byte bbb - | reserve word[4] locs - | routine main { - | ldx #0 - | copy byte - - | reserve byte bbb - | reserve word[4] locs - | routine main { - | ldx #0 - | copy >locs, x bbb - | } - = main: - = ldx #0 - = lda locs_hi, x - = sta bbb - = rts - = - = .data - = .space bbb 1 - = .space locs_lo 4 - = .space locs_hi 4 - -`main` is always emitted first. - - | reserve word position - | routine foo { - | inx - | } - | routine main { - | jsr foo - | jsr foo - | } - = main: - = jsr foo - = jsr foo - = rts - = - = foo: - = inx - = rts - = - = .data - = .space position 2 - -Reserving and assigning byte tables. - - | reserve byte[16] frequencies - | assign byte[256] screen $0400 - | routine main { - | lda #0 - | ldy #0 - | sta frequencies, y - | sta screen, y - | } - = main: - = lda #0 - = ldy #0 - = sta frequencies, y - = sta screen, y - = rts - = - = .data - = .space frequencies 16 - = .alias screen 1024 - -Reserving things with initial values. - - | reserve byte lives : 3 - | reserve word screen : $0400 - | reserve byte[8] frequencies : (0 1 2 4 5 8 9 10) - | reserve byte[13] message : "Hello, world!" - | routine main { - | } - = main: - = rts - = - = lives: .byte 3 - = screen: .word 1024 - = frequencies: .byte 0, 1, 2, 4, 5, 8, 9, 10 - = message: .byte 72, 101, 108, 108, 111, 44, 32, 119, 111, 114, 108, 100, 33 - -Temporary storage, in the form of block-local declarations. Note that these -temporaries are not unioned yet, but they could be. - - | routine a { - | reserve byte foo - | reserve word bar - | lda foo - | sta >bar - | } - | routine b { - | reserve byte baz - | reserve word quuz - | lda baz - | sta Tests for functionality "Emit ASM for SixtyPical program" - -Big test for parsing and emitting instructions. - - | reserve word vword - | reserve byte vbyte - | assign byte[256] table 1024 - | routine main { - | lda #4 - | ldx #0 - | ldy #$FF - | lda vbyte - | lda table, x - | lda table, y - | lda (vword), y - | lda vword - | inc vbyte - | tax - | inx - | dex - | stx vbyte - | tay - | iny - | dey - | sty vbyte - | cmp vbyte - | cmp #30 - | cmp vword - | ldx vbyte - | cpx vbyte - | cpx #31 - | txa - | ldy vbyte - | cpy vbyte - | cpy #32 - | tya - | sta vbyte - | sta table, x - | sta table, y - | sta (vword), y - | sta vword - | dec vbyte - | clc - | cld - | clv - | sec - | sed - | adc #8 - | adc vbyte - | and #8 - | and vbyte - | sbc #8 - | sbc vbyte - | ora #8 - | ora vbyte - | } - = main: - = lda #4 - = ldx #0 - = ldy #255 - = lda vbyte - = lda table, x - = lda table, y - = lda (vword), y - = lda vword - = lda vword+1 - = inc vbyte - = tax - = inx - = dex - = stx vbyte - = tay - = iny - = dey - = sty vbyte - = cmp vbyte - = cmp #30 - = cmp vword - = cmp vword+1 - = ldx vbyte - = cpx vbyte - = cpx #31 - = txa - = ldy vbyte - = cpy vbyte - = cpy #32 - = tya - = sta vbyte - = sta table, x - = sta table, y - = sta (vword), y - = sta vword - = sta vword+1 - = dec vbyte - = clc - = cld - = clv - = sec - = sed - = adc #8 - = adc vbyte - = and #8 - = and vbyte - = sbc #8 - = sbc vbyte - = ora #8 - = ora vbyte - = rts - = - = .data - = .space vword 2 - = .space vbyte 1 - = .alias table 1024 - - | reserve word vword - | reserve byte vbyte - | assign byte[256] table 1024 - | routine main { - | asl .a - | asl vbyte - | lsr .a - | lsr vbyte - | rol .a - | rol vbyte - | ror .a - | ror vbyte - | bit vbyte - | eor #5 - | eor vbyte - | } - = main: - = asl - = asl vbyte - = lsr - = lsr vbyte - = rol - = rol vbyte - = ror - = ror vbyte - = bit vbyte - = eor #5 - = eor vbyte - = rts - = - = .data - = .space vword 2 - = .space vbyte 1 - = .alias table 1024 - - | routine main { - | with pha { - | with sei { - | with php { - | lda #0 - | } - | lda #1 - | } - | lda #2 - | } - | } - = main: - = pha - = sei - = php - = lda #0 - = plp - = lda #1 - = cli - = lda #2 - = pla - = rts diff --git a/doc/SixtyPical.md b/doc/SixtyPical.md new file mode 100644 index 0000000..e4ea499 --- /dev/null +++ b/doc/SixtyPical.md @@ -0,0 +1,407 @@ +Sixtypical +========== + +Sixtypical is a simplified version of [Sixtypical][]. + +This is a complete reboot of the previous design and implementation, which +was semantically a mess due to the way it was built. +This aims to be a simpler design which gets the static semantics right first, +and only then is extended to be more practical. + +Types +----- + +There are two TYPES in Sixtypical: + +* bit (2 possible values) +* byte (256 possible values) + +Memory locations +---------------- + +The primary concept in Sixtypical is the MEMORY LOCATION. At any given point +in time during execution, each memory location is either UNINITIALIZED or +INITIALIZED. At any given point in the program text, too, each memory +location is either uninitialized or initialized. Where-ever it is one or +the other during execution, it is the same in the corresponding place in +the program text; thus, it is a static property. + +(There is actually a third state, WRITTEN, which indicates that the memory +location is not only initialized, but also that it has been written to in +the current routine.) + +There are four general kinds of memory location. The first three are +pre-defined and built-in. + +### Registers ### + +Each of these hold a byte. They are initially uninitialized. + + a + x + y + +### Flags ### + +Each of these hold a bit. They are initially uninitialized. + + c (carry) + z (zero) + v (overflow) + n (negative) + +### Constants ### + +It may be strange to think of constants as memory locations, but keep in mind +that a memory location in Sixtypical need not map to a memory location in the +underlying hardware. All constants are read-only. Each is +initially initialized with the value that corresponds with its name. + +They come in bit and byte types. There are two bit constants, + + off + on + +and two-hundred and fifty-six byte constants, + + 0 + 1 + ... + 255 + +### User-defined ### + +There may be any number of user-defined memory locations. They are defined +by giving the type, which must be `byte`, and the name. + + byte pos + +Routines +-------- + +Every routine must list all the memory locations it READS from, i.e. its +INPUTS, and all the memory locations it WRITES to, whether they are OUTPUTS +or merely TRASHED. Every memory location that is not written to is PRESERVED. + + routine foo + inputs a, score + outputs x + trashes y { + ... + } + +Routines may call only routines previously defined in the program source. +Thus, recursive routines are not allowed. + +There must be one routine called `main`. This routine is executed when +the program is run. + +Instructions +------------ + +### ld ### + + ld , + +Reads from src and writes to dest. + +* It is illegal if dest is not a register. +* It is illegal if dest does not occur in the WRITES list of the current + routine. +* It is illegal if src is not of same type as dest (i.e., is not a byte.) +* It is illegal if src is uninitialized. +* It is illegal if src does not either: + * be a constant, or + * occur in the READS list of the current routine, or + * occur in the WRITES list of the current routine AND + that location has previously been written inside this routine. + +After execution, dest is considered initialized. The flags `z` and `n` may be +changed by this instruction, and they are considered initialized after it has +executed. + +Some combinations, such as `ld x, y`, are illegal because they do not map to +underlying opcodes. + +Notes: + + ld a, 123 → LDA #123 + ld a, lives → LDA LIVES + ld x, 123 → LDX #123 + ld x, lives → LDX LIVES + ld y, 123 → LDY #123 + ld y, lives → LDY LIVES + ld x, a → TAX + ld y, a → TAY + ld a, x → TXA + ld a, y → TYA + +### st ### + + st , + +Reads from src and writes to dest. + +* It is illegal if dest is a register or if dest is read-only. +* It is illegal if dest does not occur in the WRITES list of the current + routine. +* It is illegal if src is not of same type as dest. +* It is illegal if src is uninitialized. +* It is illegal if src does not either: + * be a constant, or + * occur in the READS list of the current routine, or + * occur in the WRITES list of the current routine AND + that location has previously been written inside this routine. + +After execution, dest is considered initialized. No flags are +changed by this instruction (unless of course dest is a flag.) + +Notes: + + st a, lives → STA LIVES + st x, lives → STX LIVES + st y, lives → STY LIVES + st on, c → SEC + st off, c → CLC + +### add dest, src ### + + add , + +Adds the contents of src to dest and stores the result in dest. + +* It is illegal if src OR dest OR c is uninitialized. +* It is illegal if dest is read-only. +* It is illegal if dest does not occur in the WRITES AND READS lists + of the current routine. +* It is illegal if src does not either: + * be a constant, or + * occur in the READS list of the current routine, or + * occur in the WRITES list of the current routine AND + that location has previously been written inside this routine. + +Affects n, z, c, and v flags. + +dest continues to be initialized afterwards. + +Notes: + + add a, delta → ADC DELTA + add a, 1 → ADC #1 + +### inc ### + +TODO: these do not honour carry! + + inc x → INX + inc y → INY + inc lives → INC LIVES + +### sub ### + + sub , + +Subtracts the contents of src from dest and stores the result in dest. + +The constraints and effects are exactly the same as for `add`. + +Notes: + + sub a, delta → SBC DELTA + sub a, 1 → SBC #1 + +### dec ### + +TODO: these do not honour carry! + + dec x → DEX + dec y → DEY + dec lives → DEC LIVES + +### cmp ### + + cmp , + +Subtracts the contents of src from dest, but does not store the result. + +The constraints and effects are the same as for `sub`, except that `c` +need not be initialized before executing `cmp`, and the `v` flag is +unaffected. + +Notes: + + cmp a, delta → CMP DELTA + cmp a, 1 → CMP #1 + cmp x, 1 → CPX #1 + cmp y, 1 → CPY #1 + +### and ### + + and , + +"AND"s the contents of src with dest and stores the result in dest. + +The constraints are the same as for `cmp`, except that the `c` flag +is not affected. i.e. only `n` and `z` flags are affected. + +Notes: + + and a, 8 → AND #8 + +### or ### + + or , + +"OR"s the contents of src with dest and stores the result in dest. + +The constraints and effects are exactly the same as for `and`. + +Notes: + + or a, 8 → ORA #8 + +### xor ### + + xor , + +"XOR"s the contents of src with dest and stores the result in dest. + +The constraints and effects are exactly the same as for `and`. + +Notes: + + xor a, 8 → EOR #8 + +### shl ### + + shl + +Shifts the dest left one bit position. The rightmost position becomes `c`, +and `c` becomes the bit that was shifted off the left. + +* It is illegal if dest is a register besides `a`. +* It is illegal if dest is read-only. +* It is illegal if dest OR c is uninitialized. +* It is illegal if dest does not occur in the WRITES AND READS lists + of the current routine. + +Notes: + + shl a → ROL A + shl lives → ROL LIVES + +### shr ### + + shr + +Shifts the dest right one bit position. The leftmost position becomes `c`, +and `c` becomes the bit that was shifted off the right. + +Constraints are exactly the same as for `shl`. + +Notes: + + shr a → ROR A + shr lives → ROR LIVES + +### call ### + + call + +Just before the call, + +* It is illegal if any of the memory locations in the routine's READS list is + uninitialized. + +Just after the call, + +* All memory locations listed as TRASHED in the routine's WRITES list are + considered uninitialized. + +Notes: + + call routine → JSR ROUTINE + +### if ### + + if (bit) { + true-branch + } else { + false-branch + } + +_bit_ is usually one of the flags, z or c. + +Notes: + + BEQ Branch on Result Zero + BMI Branch on Result Minus + BNE Branch on Result not Zero + BPL Branch on Result Plus + BCC Branch on Carry Clear + BCS Branch on Carry Set + BVC Branch on Overflow Clear + BVS Branch on Overflow Set + + +- - - - + +Grammar +------- + + Program ::= {Defn} {Routine}. + Defn ::= "byte" NewIdent. + Routine ::= "routine" NewIdent + ["inputs" LocExprs] ["outputs" LocExprs] ["trashes" LocExprs] + Block. + LocExprs::= LocExpr {"," LocExpr}. + LocExpr ::= Register | Flag | Const | DefnIdent. + Register::= "a" | "x" | "y". + Flag ::= "c" | "z" | "n" | "v". + Const ::= "0" ... "255". + Block ::= "{" {Instr} "}". + Instr ::= "ld" LocExpr "," LocExpr + | "st" LocExpr "," LocExpr + | "add" LocExpr "," LocExpr + | "sub" LocExpr "," LocExpr + | "cmp" LocExpr "," LocExpr + | "and" LocExpr "," LocExpr + | "or" LocExpr "," LocExpr + | "xor" LocExpr "," LocExpr + | "shl" LocExpr + | "shr" LocExpr + | "inc" LocExpr + | "dec" LocExpr + | "call" RoutineIdent + | "if" LocExpr Block ["else" Block]. + + +### 6502 instructions unsupported ### + + ASL Shift Left One Bit (Memory or Accumulator) + LSR Shift Right One Bit (Memory or Accumulator) + + BIT Test Bits in Memory with Accumulator + BRK Force Break + + CLD Clear Decimal Mode + CLI Clear interrupt Disable Bit + CLV Clear Overflow Flag + + NOP No Operation + + JMP Jump to New Location // but may be generated as part of `if` + + PHA Push Accumulator on Stack + PHP Push Processor Status on Stack + PLA Pull Accumulator from Stack + PLP Pull Processor Status from Stack + + RTI Return from Interrupt + RTS Return from Subroutine + + SED Set Decimal Mode + SEI Set Interrupt Disable Status + + TSX Transfer Stack Pointer to Index X + TXS Transfer Index X to Stack Pointer diff --git a/eg/add-fail.60p b/eg/add-fail.60p new file mode 100644 index 0000000..ee9e034 --- /dev/null +++ b/eg/add-fail.60p @@ -0,0 +1,6 @@ +routine add_four + inputs a + outputs a +{ + add a, 4 +} diff --git a/eg/add-pass.60p b/eg/add-pass.60p new file mode 100644 index 0000000..03873fa --- /dev/null +++ b/eg/add-pass.60p @@ -0,0 +1,8 @@ +routine add_four + inputs a + outputs a + trashes c +{ + st off, c + add a, 4 +} diff --git a/eg/cinv.60p b/eg/cinv.60p deleted file mode 100644 index 7ebe503..0000000 --- a/eg/cinv.60p +++ /dev/null @@ -1,15 +0,0 @@ -assign byte screen 1024 -assign vector cinv 788 -reserve vector save_cinv - -routine main { - with sei { - copy cinv save_cinv - copy routine our_cinv to cinv - } -} - -routine our_cinv { - inc screen - jmp (save_cinv) -} diff --git a/eg/demo.60p b/eg/demo.60p deleted file mode 100644 index 8c0b1a7..0000000 --- a/eg/demo.60p +++ /dev/null @@ -1,110 +0,0 @@ -assign byte[256] screen $0400 -assign byte[256] screen2 1274 -assign byte[256] screen3 1524 -assign byte[256] screen4 1774 - -assign byte[256] colormap 55296 -assign byte[256] colormap2 55546 -assign byte[256] colormap3 55796 -assign byte[256] colormap4 56046 - -assign byte vic_border 53280 -assign byte[4] vic_bg 53281 - -assign vector cinv 788 -reserve vector save_cinv - -assign word position $fb - -reserve byte value - -reserve word m -reserve word n - -routine reset_position { - lda #$00 - sta position -} - -routine increment_pos { - clc - lda position - adc #0 - sta >position -} - -routine compare_16_bit { - lda >m - cmp >n - if beq { - lda position - sta >m - lda #$07 - sta >n - lda #$e8 - sta delta - } else { - // copy #40 delta - lda #40 - sta delta - } -} - -routine calculate_new_position outputs (new_position) { - clc - lda position - adc >delta - sta >new_position -} - -routine compare_new_pos outputs (.c) { - lda >new_position - cmp >compare_target - if beq { - lda delta - ldx joy2 - txa - and #1 // up - if beq { - lda #216 // -40 - sta delta - } else { - txa - and #2 // down - if beq { - lda #40 - sta delta - } else { - txa - and #8 // right - if beq { - lda #1 - sta actor_pos, y - txa - copy .a actor_delta, y - copy #40 tmp.oph || exit 1 -cat lib/basic_header.oph tmp.oph > tmp2.oph || exit 1 -ophis tmp2.oph -o tmp.prg || exit 1 -x64 -joydev2 1 tmp.prg -rm -f tmp.oph tmp2.oph tmp.prg diff --git a/src/Main.hs b/src/Main.hs deleted file mode 100644 index 4459c67..0000000 --- a/src/Main.hs +++ /dev/null @@ -1,48 +0,0 @@ --- encoding: UTF-8 - -module Main where - -import System.IO -import System.Environment -import System.Exit - -import SixtyPical.Model -import SixtyPical.Parser (parseProgram) -import SixtyPical.Checker (checkAndTransformProgram) -import SixtyPical.Analyzer (analyzeProgram) -import SixtyPical.Context (ppAnalysis) -import SixtyPical.Emitter (emitProgram) - --- -- -- -- driver -- -- -- -- - -usage = do - putStrLn "Usage: sixtypical (parse|check|analyze|emit) filename.60p" - exitWith $ ExitFailure 1 - -main = do - args <- getArgs - case args of - [verb, filename] -> do - programText <- readFile filename - case (verb, parseProgram programText) of - ("parse", Right program) -> do - putStrLn $ show $ program - ("check", Right program) -> do - case checkAndTransformProgram program of - Just newprog -> - putStrLn $ programSummary newprog - ("analyze", Right program) -> - case checkAndTransformProgram program of - Just newprog -> - ppAnalysis newprog (analyzeProgram newprog) - ("emit", Right program) -> - case checkAndTransformProgram program of - Just newprog -> - case (length (show (analyzeProgram newprog)) < 9999999) of - True -> - putStr $ emitProgram newprog - (_, Left problem) -> do - hPutStrLn stderr (show problem) - exitWith $ ExitFailure 1 - (_, _) -> usage - _ -> usage diff --git a/src/SixtyPical/Analyzer.hs b/src/SixtyPical/Analyzer.hs deleted file mode 100644 index fff6e6d..0000000 --- a/src/SixtyPical/Analyzer.hs +++ /dev/null @@ -1,176 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Analyzer where - -import qualified Data.Map as Map - -import SixtyPical.Model -import SixtyPical.Context - --- -- -- -- abstract interpreter -- -- -- -- - -analyzeProgram program@(Program decls routines) = - checkRoutines routines Map.empty - where - checkRoutines [] progCtx = progCtx - checkRoutines (rout@(Routine name outputs _) : routs) progCtx = - let - routCtx = Map.empty - routAnalysis = checkRoutine rout progCtx routCtx - progCtx' = Map.insert name routAnalysis progCtx - in - checkRoutines routs progCtx' - - checkRoutine (Routine name outputs instrs) progCtx routCtx = - checkBlock name instrs progCtx routCtx - - checkBlock nm (Block decls instrs) progCtx routCtx = - checkInstrs nm instrs progCtx routCtx - - checkInstrs nm [] progCtx routCtx = routCtx - checkInstrs nm (instr:instrs) progCtx routCtx = - let - routCtx' = checkInstr nm instr progCtx routCtx - in - checkInstrs nm instrs progCtx routCtx' - - -- -- -- -- -- -- -- -- -- -- -- -- - - checkInstr nm (COPY src dst) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - checkInstr nm (DELTA dst val) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith (Immediate val)) routCtx - checkInstr nm (ADD dst src) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - checkInstr nm (SUB dst src) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - - checkInstr nm (AND dst src) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - checkInstr nm (OR dst src) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - checkInstr nm (XOR dst src) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith src) routCtx - - checkInstr nm (JSR name) progCtx routCtx = - case lookupRoutine program name of - Just calledRout -> - case Map.lookup name progCtx of - Just calledRoutCtx -> - mergeRoutCtxs nm routCtx calledRoutCtx calledRout - Nothing -> - error ("can't call routine '" ++ name ++ "' before it is defined") - Nothing -> - -- it must be an external. - -- TODO: merge in any poisoning/outputs that are declared - -- on the external. for now, - routCtx - - checkInstr nm (CMP reg addr) progCtx routCtx = - -- TODO: mark Carry bit as "touched" here - routCtx - checkInstr nm (IF _ branch b1 b2) progCtx routCtx = - let - routCtx1 = checkBlock nm b1 progCtx routCtx - routCtx2 = checkBlock nm b2 progCtx routCtx - in - mergeAlternateRoutCtxs nm routCtx1 routCtx2 - checkInstr nm (REPEAT _ branch blk) progCtx routCtx = - -- we analyze the block twice, to simulate it being - -- repeated. (see tests for a test case on this.) - let - routCtx' = checkBlock nm blk progCtx routCtx - routCtx'' = checkBlock nm blk progCtx routCtx' - in - routCtx'' - - -- TODO -- THESE ARE WEAK -- - checkInstr nm (WITH _ blk) progCtx routCtx = - checkBlock nm blk progCtx routCtx - - checkInstr nm (BIT dst) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith (Immediate 0)) routCtx - - checkInstr nm (SHR dst flg) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith flg) routCtx - checkInstr nm (SHL dst flg) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith flg) routCtx - - checkInstr nm (COPYROUTINE name dst) progCtx routCtx = - updateRoutCtxPoison nm dst (UpdatedWith (Immediate 7)) routCtx - - checkInstr nm (JMPVECTOR dst) progCtx routCtx = - routCtx - - checkInstr nm NOP progCtx routCtx = - routCtx - - checkInstr nm instr _ _ = error ( - "Internal error: sixtypical doesn't know how to " ++ - "analyze '" ++ (show instr) ++ "' in '" ++ nm ++ "'") - --- --- Utility function: --- Take 2 routine contexts -- the current routine and a routine that was just --- JSR'ed to (immediately previously) -- and merge them to create a new --- context for the current routine. --- --- This can't, by itself, cause a poisoning error. --- So we use a weaker version of updateRoutCtx to build the merged context. --- -mergeRoutCtxs nm routCtx calledRoutCtx calledRout@(Routine name outputs _) = - let - -- go through all the Usages in the calledRoutCtx - -- insert any that were updated, into routCtx - poison location usage routCtxAccum = - case usage of - UpdatedWith ulocation -> - case location `elem` outputs of - True -> - updateRoutCtx nm location usage routCtxAccum - False -> - updateRoutCtx nm location (PoisonedWith ulocation) routCtxAccum - PoisonedWith ulocation -> - updateRoutCtx nm location usage routCtxAccum - in - foldrWithKey (poison) routCtx calledRoutCtx - where - -- for Hugs Sep2006, which doesn't have Map.foldrWithKey - foldrWithKey f z = foldr (uncurry f) z . Map.toAscList - --- --- Utility function: --- Take 2 routine contexts -- one from each branch of an `if` -- and merge --- them to create a new context for the remainder of the routine. --- --- We use a weaker version of updateRoutCtx to build the merged context. --- We do this because accessing a poisoned storage location from either --- of the branch contexts is not an error at the merge point -- we simply --- make the storage location poisoned in the resulting context. (If the --- poisoned location is accessed subsequently to the merge point, that is --- of course still an error.) --- -mergeAlternateRoutCtxs nm routCtx1 routCtx2 = - let - -- go through all the Usages in routCtx2 - -- insert any that were updated, into routCtx1 - poison location usage2 routCtxAccum = - case Map.lookup location routCtx1 of - Nothing -> - updateRoutCtx nm location usage2 routCtxAccum - Just usage1 -> - -- it exists in both routCtxs. - -- if it is poisoned in either, it's poisoned here. - -- otherwise, it is OK to differ. - let - newUsage = case (usage1, usage2) of - (PoisonedWith _, _) -> usage1 - (_, PoisonedWith _) -> usage2 - _ -> usage1 -- or 2. doesn't matter. - in - updateRoutCtx nm location newUsage routCtxAccum - in - foldrWithKey (poison) routCtx1 routCtx2 - where - -- for Hugs Sep2006, which doesn't have Map.foldrWithKey - foldrWithKey f z = foldr (uncurry f) z . Map.toAscList diff --git a/src/SixtyPical/Checker.hs b/src/SixtyPical/Checker.hs deleted file mode 100644 index 74feaa8..0000000 --- a/src/SixtyPical/Checker.hs +++ /dev/null @@ -1,86 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Checker where - -import SixtyPical.Model -import SixtyPical.Transformer - -allTrue = foldl (&&) True - -trueOrDie message test = - if test then True else error message - -isUnique [] = True -isUnique (x:xs) = (not (x `elem` xs)) && isUnique xs - --- -- - -noDuplicateDecls program = - isUnique $ declaredLocationNames program - -noDuplicateRoutines program = - isUnique $ declaredRoutineNames program - --- wow. efficiency is clearly our watchword --- (and sarcasm is our backup watchword) -noIndexedAccessOfNonTables p@(Program decls routines) = - let - mappedProgram = mapProgramRoutines (checkInstr) p - in - mappedProgram == p - where - checkInstr j@(COPY _ (Indexed (NamedLocation sz g) reg)) = - case lookupDecl p g of - Just (Assign _ (Table _ _) _) -> j - Just (Reserve _ (Table _ _) _) -> j - Just _ -> (COPY A A) - Nothing -> (COPY A A) - checkInstr other = other - -noUseOfUndeclaredRoutines p@(Program decls routines) = - let - undeclaredRoutines = foldProgramRoutines (checkInstr) 0 p - in - undeclaredRoutines == 0 - where - routineNames = declaredRoutineNames p - -- TODO also check COPYROUTINE here - checkInstr j@(JSR routName) acc = - case routName `elem` routineNames of - True -> acc - False -> error ("undeclared routine '" ++ routName ++ "'") -- acc + 1 - checkInstr other acc = acc - -consistentInitialTableSizes p@(Program decls routines) = - let - inconsistentTableSizes = foldProgramDecls (checkDecl) 0 p - in - inconsistentTableSizes == 0 - where - checkDecl (Reserve _ (Table _ sz) []) acc = acc - checkDecl (Reserve _ (Table _ sz) vals) acc = - case sz == (length vals) of - True -> acc - False -> acc + 1 - checkDecl _ acc = acc - --- - - - - - - - -checkAndTransformProgram :: Program -> Maybe Program -checkAndTransformProgram program = - if - trueOrDie ("missing 'main' routine: " ++ show program) (routineDeclared "main" program) && - trueOrDie "duplicate location name" (noDuplicateDecls program) && - trueOrDie "duplicate routine name" (noDuplicateRoutines program) && - trueOrDie "undeclared routine" (noUseOfUndeclaredRoutines program) && - trueOrDie "indexed access of non-table" (noIndexedAccessOfNonTables program) && - trueOrDie "initial table incorrect size" (consistentInitialTableSizes program) - then - let - program' = numberProgramLoops program - program'' = renameBlockDecls program' - program''' = liftBlockDecls program'' - program'''' = fillOutNamedLocationTypes program''' - in - Just program'''' - else Nothing diff --git a/src/SixtyPical/Context.hs b/src/SixtyPical/Context.hs deleted file mode 100644 index de4d3f0..0000000 --- a/src/SixtyPical/Context.hs +++ /dev/null @@ -1,93 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Context where - --- contexts for abstract interpretation. - -import qualified Data.Map as Map - -import SixtyPical.Model - --- --- The result of analyzing an instruction (or a block) is a map from --- all relevant StorageLocations to how those StorageLocations were --- used in that code (a Usage.) --- --- If a StorageLocation is missing from the map, we can assume that --- that code does not affect that StorageLocation (it is "retained".) --- - -data Usage = PoisonedWith StorageLocation - | UpdatedWith StorageLocation - | NotChanged - deriving (Show, Ord, Eq) - -type RoutineContext = Map.Map StorageLocation Usage - -type ProgramContext = Map.Map RoutineName RoutineContext - -untypedLocation (HighByteOf x) = - untypedLocation x -untypedLocation (LowByteOf x) = - untypedLocation x -untypedLocation (Indexed table index) = - untypedLocation table -untypedLocation (IndirectIndexed word index) = - IndirectIndexed (untypedLocation word) index -untypedLocation (NamedLocation _ name) = - NamedLocation Nothing name -untypedLocation x = x - -updateRoutCtxPoison :: String -> StorageLocation -> Usage -> RoutineContext -> RoutineContext -updateRoutCtxPoison nm dst (UpdatedWith src) routCtx = - let - s = untypedLocation src - d = untypedLocation dst - in - case Map.lookup s routCtx of - Just (PoisonedWith _) -> - error ("routine '" ++ nm ++ "' does not preserve '" ++ - (show s) ++ "' (in context: " ++ (show routCtx) ++ ")") - _ -> - Map.insert d (UpdatedWith s) routCtx -updateRoutCtxPoison nm dst (PoisonedWith src) routCtx = - Map.insert (untypedLocation dst) (PoisonedWith $ untypedLocation src) routCtx - -updateRoutCtx nm dst (UpdatedWith src) routCtx = - let - s = untypedLocation src - d = untypedLocation dst - in - Map.insert d (UpdatedWith s) routCtx -updateRoutCtx nm dst (PoisonedWith src) routCtx = - Map.insert (untypedLocation dst) (PoisonedWith $ untypedLocation src) routCtx - --- pretty printing - -ppAnalysis :: Program -> ProgramContext -> IO () -ppAnalysis program progCtx = - let - li = Map.toList progCtx - in do - ppRoutines program li - -ppRoutines program [] = return () -ppRoutines program ((name, routCtx):rest) = - let - Just (Routine rname outputs _) = lookupRoutine program name - in do - putStrLn (rname ++ " (" ++ (show outputs) ++ ")") - ppRoutine routCtx - putStrLn "" - ppRoutines program rest - -ppRoutine routCtx = - let - li = Map.toList routCtx - in do - ppUsages li - -ppUsages [] = return () -ppUsages ((loc, usage):rest) = do - putStrLn $ (" " ++ (show loc) ++ ": " ++ (show usage)) - ppUsages rest diff --git a/src/SixtyPical/Emitter.hs b/src/SixtyPical/Emitter.hs deleted file mode 100644 index d2898a0..0000000 --- a/src/SixtyPical/Emitter.hs +++ /dev/null @@ -1,281 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Emitter where - -import Data.Bits - -import SixtyPical.Model - -emitProgram p@(Program decls routines) = - let - mains = filter (\(Routine name _ _) -> name == "main") routines - allElse = filter (\(Routine name _ _) -> name /= "main") routines - initializedDecls = filter (\d -> isInitializedDecl d) decls - uninitializedDecls = filter (\d -> not $ isInitializedDecl d) decls - in - emitRoutines p mains ++ - emitRoutines p allElse ++ - emitDecls p initializedDecls ++ - (case uninitializedDecls of - [] -> "" - _ -> ".data\n" ++ emitDecls p uninitializedDecls) - -emitDecls _ [] = "" -emitDecls p (decl:decls) = - emitDecl p decl ++ "\n" ++ emitDecls p decls - -emitDecl p (Assign name _ addr) = ".alias " ++ name ++ " " ++ (show addr) -emitDecl p (Reserve name typ [val]) - | typ == Byte = name ++ ": .byte " ++ (show val) - | typ == Word = name ++ ": .word " ++ (show val) - | typ == Vector = name ++ ": .word " ++ (show val) - -emitDecl p (Reserve name (Table Byte size) []) = - ".space " ++ name ++ " " ++ (show size) - -emitDecl p (Reserve name (Table Byte size) vals) = - name ++ ": .byte " ++ (showList vals) - where - showList [] = "" - showList [val] = show val - showList (val:vals) = (show val) ++ ", " ++ (showList vals) - -emitDecl p (Reserve name (Table typ size) []) - | typ == Word || typ == Vector = - ".space " ++ name ++ "_lo " ++ (show size) ++ "\n" ++ - ".space " ++ name ++ "_hi " ++ (show size) - -emitDecl p (Reserve name typ []) - | typ == Byte = ".space " ++ name ++ " 1" - | typ == Word = ".space " ++ name ++ " 2" - | typ == Vector = ".space " ++ name ++ " 2" - -emitDecl p (External name addr) = ".alias " ++ name ++ " " ++ (show addr) -emitDecl p d = error ( - "Internal error: sixtypical doesn't know how to " ++ - "emit assembler code for '" ++ (show d) ++ "'") - -emitRoutines _ [] = "" -emitRoutines p (rout:routs) = - emitRoutine p rout ++ "\n" ++ emitRoutines p routs - -emitRoutine p r@(Routine name _ block) = - name ++ ":\n" ++ emitBlock p r block ++ " rts\n" - -emitBlock p r (Block decls instrs) = - emitInstrs p r instrs - -emitInstrs _ _ [] = "" -emitInstrs p r (instr:instrs) = - " " ++ emitInstr p r instr ++ "\n" ++ emitInstrs p r instrs - -emitInstr p r (COPY (Immediate val) A) = "lda #" ++ (show val) -emitInstr p r (COPY (Immediate val) X) = "ldx #" ++ (show val) -emitInstr p r (COPY (Immediate val) Y) = "ldy #" ++ (show val) - -emitInstr p r (COPY (Immediate 0) FlagC) = "clc" -emitInstr p r (COPY (Immediate 0) FlagD) = "cld" -emitInstr p r (COPY (Immediate 0) FlagV) = "clv" -emitInstr p r (COPY (Immediate 1) FlagC) = "sec" -emitInstr p r (COPY (Immediate 1) FlagD) = "sed" - -emitInstr p r (COPY A (NamedLocation st label)) = "sta " ++ label -emitInstr p r (COPY X (NamedLocation st label)) = "stx " ++ label -emitInstr p r (COPY Y (NamedLocation st label)) = "sty " ++ label -emitInstr p r (COPY (NamedLocation st label) A) = "lda " ++ label -emitInstr p r (COPY (NamedLocation st label) X) = "ldx " ++ label -emitInstr p r (COPY (NamedLocation st label) Y) = "ldy " ++ label - -emitInstr p r (COPY (LowByteOf (NamedLocation st label)) A) = "lda " ++ label -emitInstr p r (COPY (HighByteOf (NamedLocation st label)) A) = "lda " ++ label ++ "+1" - -emitInstr p r (COPY A (LowByteOf (NamedLocation st label))) = "sta " ++ label -emitInstr p r (COPY A (HighByteOf (NamedLocation st label))) = "sta " ++ label ++ "+1" - -emitInstr p r (COPY A X) = "tax" -emitInstr p r (COPY A Y) = "tay" -emitInstr p r (COPY X A) = "txa" -emitInstr p r (COPY Y A) = "tya" - -emitInstr p r (COPY A (Indexed (NamedLocation (Just (Table Byte _)) label) X)) = "sta " ++ label ++ ", x" -emitInstr p r (COPY A (Indexed (NamedLocation (Just (Table Byte _)) label) Y)) = "sta " ++ label ++ ", y" - -emitInstr p r (COPY (Indexed (NamedLocation (Just (Table Byte _)) label) X) A) = "lda " ++ label ++ ", x" -emitInstr p r (COPY (Indexed (NamedLocation (Just (Table Byte _)) label) Y) A) = "lda " ++ label ++ ", y" - -emitInstr p r (COPY (NamedLocation (Just st1) src) (Indexed (NamedLocation (Just (Table st2 _)) dst) reg)) - | (st1 == Vector && st2 == Vector) || (st1 == Word && st2 == Word) = - "lda " ++ src ++ "\n" ++ - " sta " ++ dst ++ "_lo, " ++ (regName reg) ++ "\n" ++ - " lda " ++ src ++ "+1\n" ++ - " sta " ++ dst ++ "_hi, " ++ (regName reg) - -emitInstr p r (COPY (NamedLocation (Just Byte) src) - (LowByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "lda " ++ src ++ "\n" ++ - " sta " ++ dst ++ "_lo, " ++ (regName reg) - -emitInstr p r (COPY (NamedLocation (Just Byte) src) - (HighByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "lda " ++ src ++ "\n" ++ - " sta " ++ dst ++ "_hi, " ++ (regName reg) - -emitInstr p r (COPY (Immediate value) - (LowByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "lda #" ++ (show value) ++ "\n" ++ - " sta " ++ dst ++ "_lo, " ++ (regName reg) - -emitInstr p r (COPY (Immediate value) - (HighByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "lda #" ++ (show value) ++ "\n" ++ - " sta " ++ dst ++ "_hi, " ++ (regName reg) - -emitInstr p r (COPY A - (LowByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "sta " ++ dst ++ "_lo, " ++ (regName reg) - -emitInstr p r (COPY A - (HighByteOf (Indexed (NamedLocation (Just (Table Word _)) dst) reg))) = - "sta " ++ dst ++ "_hi, " ++ (regName reg) - -emitInstr p r (COPY (LowByteOf (Indexed (NamedLocation (Just (Table Word _)) src) reg)) - (NamedLocation (Just Byte) dst)) = - "lda " ++ src ++ "_lo, " ++ (regName reg) ++ "\n" ++ - " sta " ++ dst - -emitInstr p r (COPY (HighByteOf (Indexed (NamedLocation (Just (Table Word _)) src) reg)) - (NamedLocation (Just Byte) dst)) = - "lda " ++ src ++ "_hi, " ++ (regName reg) ++ "\n" ++ - " sta " ++ dst - -emitInstr p r (COPY (Indexed (NamedLocation (Just (Table st1 _)) src) reg) (NamedLocation (Just st2) dst)) - | (st1 == Vector && st2 == Vector) || (st1 == Word && st2 == Word) = - "lda " ++ src ++ "_lo, " ++ (regName reg) ++ "\n" ++ - " sta " ++ dst ++ "\n" ++ - " lda " ++ src ++ "_hi, " ++ (regName reg) ++ "\n" ++ - " sta " ++ dst ++ "+1" - -emitInstr p r (COPY A (IndirectIndexed (NamedLocation st label) Y)) = "sta (" ++ label ++ "), y" -emitInstr p r (COPY (IndirectIndexed (NamedLocation st label) Y) A) = "lda (" ++ label ++ "), y" - -emitInstr p r (COPY (NamedLocation (Just st1) src) (NamedLocation (Just st2) dst)) - | (st1 == Vector && st2 == Vector) || (st1 == Word && st2 == Word) = - "lda " ++ src ++ "\n" ++ - " sta " ++ dst ++ "\n" ++ - " lda " ++ src ++ "+1\n" ++ - " sta " ++ dst ++ "+1" - -emitInstr p r (COPY (Immediate v) (NamedLocation (Just st) dst)) - | st == Byte = - "lda #" ++ (show v) ++ "\n" ++ - " sta " ++ dst - | st == Word = - let - low = v .&. 255 - high = (shift v (-8)) .&. 255 - in - "lda #" ++ (show low) ++ "\n" ++ - " sta " ++ dst ++ "\n" ++ - " lda #" ++ (show high) ++ "\n" ++ - " sta " ++ dst ++ "+1" - -emitInstr p r (CMP A (NamedLocation st label)) = "cmp " ++ label -emitInstr p r (CMP X (NamedLocation st label)) = "cpx " ++ label -emitInstr p r (CMP Y (NamedLocation st label)) = "cpy " ++ label - -emitInstr p r (CMP A (Immediate val)) = "cmp #" ++ (show val) -emitInstr p r (CMP X (Immediate val)) = "cpx #" ++ (show val) -emitInstr p r (CMP Y (Immediate val)) = "cpy #" ++ (show val) - -emitInstr p r (CMP A (LowByteOf (NamedLocation st label))) = "cmp " ++ label -emitInstr p r (CMP A (HighByteOf (NamedLocation st label))) = "cmp " ++ label ++ "+1" - -emitInstr p r (ADD A (NamedLocation st label)) = "adc " ++ label -emitInstr p r (ADD A (Immediate val)) = "adc #" ++ (show val) - -emitInstr p r (ADD A (LowByteOf (NamedLocation st label))) = "adc " ++ label -emitInstr p r (ADD A (HighByteOf (NamedLocation st label))) = "adc " ++ label ++ "+1" - -emitInstr p r (AND A (NamedLocation st label)) = "and " ++ label -emitInstr p r (AND A (Immediate val)) = "and #" ++ (show val) - -emitInstr p r (SUB A (NamedLocation st label)) = "sbc " ++ label -emitInstr p r (SUB A (Immediate val)) = "sbc #" ++ (show val) - -emitInstr p r (OR A (NamedLocation st label)) = "ora " ++ label -emitInstr p r (OR A (Immediate val)) = "ora #" ++ (show val) - -emitInstr p r (XOR A (NamedLocation st label)) = "eor " ++ label -emitInstr p r (XOR A (Immediate val)) = "eor #" ++ (show val) - -emitInstr p r (SHL A (Immediate 0)) = "asl" -emitInstr p r (SHL (NamedLocation st label) (Immediate 0)) = "asl " ++ label -emitInstr p r (SHR A (Immediate 0)) = "lsr" -emitInstr p r (SHR (NamedLocation st label) (Immediate 0)) = "lsr " ++ label -emitInstr p r (SHL A FlagC) = "rol" -emitInstr p r (SHL (NamedLocation st label) FlagC) = "rol " ++ label -emitInstr p r (SHR A FlagC) = "ror" -emitInstr p r (SHR (NamedLocation st label) FlagC) = "ror " ++ label - -emitInstr p r (BIT (NamedLocation st label)) = "bit " ++ label - -emitInstr p r (DELTA X 1) = "inx" -emitInstr p r (DELTA X (-1)) = "dex" -emitInstr p r (DELTA Y 1) = "iny" -emitInstr p r (DELTA Y (-1)) = "dey" -emitInstr p r (DELTA (NamedLocation st label) 1) = "inc " ++ label -emitInstr p r (DELTA (NamedLocation st label) (-1)) = "dec " ++ label - -emitInstr p r (IF iid branch b1 b2) = - (show branch) ++ " _label_" ++ (show iid) ++ "\n" ++ - emitBlock p r b2 ++ - " jmp _past_" ++ (show iid) ++ "\n" ++ - "_label_" ++ (show iid) ++ ":\n" ++ - emitBlock p r b1 ++ - "_past_" ++ (show iid) ++ ":" - -emitInstr p r (REPEAT iid branch blk) = - "\n_repeat_" ++ (show iid) ++ ":\n" ++ - emitBlock p r blk ++ - " " ++ (show branch) ++ " _repeat_" ++ (show iid) - -emitInstr p r (WITH SEI blk) = - "sei\n" ++ - emitBlock p r blk ++ - " cli" - -emitInstr p r (WITH (PUSH A) blk) = - "pha\n" ++ - emitBlock p r blk ++ - " pla" - -emitInstr p r (WITH (PUSH AllFlags) blk) = - "php\n" ++ - emitBlock p r blk ++ - " plp" - -emitInstr p r (COPYROUTINE src (NamedLocation (Just Vector) dst)) = - "lda #<" ++ src ++ "\n" ++ - " sta " ++ dst ++ "\n" ++ - " lda #>" ++ src ++ "\n" ++ - " sta " ++ dst ++ "+1" - -emitInstr p r (COPYROUTINE src (Indexed (NamedLocation (Just (Table Vector _)) dst) reg)) = - "lda #<" ++ src ++ "\n" ++ - " sta " ++ dst ++ "_lo, " ++ (regName reg) ++ "\n" ++ - " lda #>" ++ src ++ "\n" ++ - " sta " ++ dst ++ "_hi, " ++ (regName reg) - -emitInstr p r (JMPVECTOR (NamedLocation (Just Vector) dst)) = - "jmp (" ++ dst ++ ")" - -emitInstr p r (JSR routineName) = - "jsr " ++ routineName - -emitInstr p r i = error ( - "Internal error: sixtypical doesn't know how to " ++ - "emit assembler code for '" ++ (show i) ++ "'") - - -regName X = "x" -regName Y = "y" diff --git a/src/SixtyPical/Model.hs b/src/SixtyPical/Model.hs deleted file mode 100644 index 647e485..0000000 --- a/src/SixtyPical/Model.hs +++ /dev/null @@ -1,204 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Model where - --- -- -- -- machine model -- -- -- -- - -type DataValue = Int -- LET'S ASSUME THIS IS AT LEAST 8 BITS -type Address = Int -- LET'S ASSUME THIS IS AT LEAST 16 BITS - -type InternalID = Int -- for numbering labels for if/repeat - -type LocationName = String - --- We do not include the PC as it of course changes constantly. --- We do not include the stack pointer, as it should not change over --- the lifetime of a single routine. (Always pop what you pushed.) --- Ditto the I flag. (always enable interrupts after disabling them.) --- We do not include the B flag, because for us, BRK is game over, man. - --- One of these should never refer to the program code. We can only police --- this up to a point. - -data StorageType = Byte - | Word - | Vector - | Table StorageType DataValue - deriving (Show, Ord, Eq) - -data StorageLocation = A - | Y - | X - | FlagN - | FlagV - | FlagD - | FlagZ - | FlagC - | AllFlags -- for PHP - | Immediate DataValue - | Indirect StorageLocation - | Indexed StorageLocation StorageLocation - | IndirectIndexed StorageLocation StorageLocation - | NamedLocation (Maybe StorageType) LocationName - | LowByteOf StorageLocation - | HighByteOf StorageLocation - deriving (Show, Ord, Eq) - --- -- -- -- program model -- -- -- -- - -data Decl = Assign LocationName StorageType Address -- .alias - | Reserve LocationName StorageType [DataValue] -- .word, .byte - | External RoutineName Address - deriving (Show, Ord, Eq) - -type RoutineName = String - -data Branch = BCC | BCS | BEQ | BMI | BNE | BPL | BVC | BVS - deriving (Show, Ord, Eq) - -data WithInstruction = SEI - | PUSH StorageLocation - deriving (Show, Ord, Eq) - -data Block = Block [Decl] [Instruction] - deriving (Show, Ord, Eq) - -data Instruction = COPY StorageLocation StorageLocation - | CMP StorageLocation StorageLocation - | ADD StorageLocation StorageLocation - | AND StorageLocation StorageLocation - | SUB StorageLocation StorageLocation - | OR StorageLocation StorageLocation - | XOR StorageLocation StorageLocation - | SHL StorageLocation StorageLocation - | SHR StorageLocation StorageLocation - | BIT StorageLocation - | JSR RoutineName - -- | JSRVECTOR StorageLocation - | JMPVECTOR StorageLocation - | IF InternalID Branch Block Block - | REPEAT InternalID Branch Block - | DELTA StorageLocation DataValue - | WITH WithInstruction Block - | COPYROUTINE RoutineName StorageLocation - | NOP - deriving (Show, Ord, Eq) - -data Routine = Routine RoutineName [StorageLocation] Block - deriving (Show, Ord, Eq) - -data Program = Program [Decl] [Routine] - deriving (Show, Ord, Eq) - --- -- -- accessors and helpers -- -- -- - --- bit of a hack to deepseq the eval -programSummary p@(Program decls routs) = - show ((length $ show p) < 99999) - -getRoutineName (Routine name _ _) = name - -getDeclLocationName (Assign name _ _) = name -getDeclLocationName (Reserve name _ _) = name - -getDeclLocationType (Assign _ t _) = t -getDeclLocationType (Reserve _ t _) = t - -isLocationDecl (Assign _ _ _) = True -isLocationDecl (Reserve _ _ _) = True -isLocationDecl _ = False - -isInitializedDecl (Assign _ _ _) = False -isInitializedDecl (Reserve _ _ (v:vs)) = True -isInitializedDecl (Reserve _ _ []) = False -isInitializedDecl _ = False - -declaredLocationNames (Program decls _) = - map (getDeclLocationName) (filter (isLocationDecl) decls) - -locationDeclared locName p = - elem locName $ declaredLocationNames p - -getDeclRoutineName (External name _) = name - -isRoutineDecl (External _ _) = True -isRoutineDecl _ = False - -declaredRoutineNames (Program decls routines) = - map (getRoutineName) routines ++ - map (getDeclRoutineName) (filter (isRoutineDecl) decls) - -routineDeclared routName p = - elem routName (declaredRoutineNames p) - --- - -mapInstrs :: (Instruction -> Instruction) -> [Instruction] -> [Instruction] -mapInstrs = map - -mapBlock :: (Instruction -> Instruction) -> Block -> Block -mapBlock f (Block decls instrs) = - Block decls (mapInstrs f instrs) - -mapRoutine :: (Instruction -> Instruction) -> Routine -> Routine -mapRoutine f (Routine name outputs block) = - Routine name outputs (mapBlock f block) - -mapRoutines :: (Instruction -> Instruction) -> [Routine] -> [Routine] -mapRoutines f [] = [] -mapRoutines f (rout:routs) = - (mapRoutine f rout):(mapRoutines f routs) - -mapProgramRoutines :: (Instruction -> Instruction) -> Program -> Program -mapProgramRoutines f (Program decls routs) = - Program decls $ mapRoutines f routs - --- - -foldInstrs :: (Instruction -> a -> a) -> a -> [Instruction] -> a -foldInstrs = foldr - -foldBlock :: (Instruction -> a -> a) -> a -> Block -> a -foldBlock f a (Block decls instrs) = - foldInstrs f a instrs - -foldRoutine :: (Instruction -> a -> a) -> a -> Routine -> a -foldRoutine f a (Routine name outputs instrs) = - foldBlock f a instrs - -foldRoutines :: (Instruction -> a -> a) -> a -> [Routine] -> a -foldRoutines f a [] = a -foldRoutines f a (rout:routs) = - let - z = foldRoutine f a rout - in - foldRoutines f z routs - -foldProgramRoutines :: (Instruction -> a -> a) -> a -> Program -> a -foldProgramRoutines f a (Program decls routs) = - foldRoutines f a routs - -foldDecls :: (Decl -> a -> a) -> a -> [Decl] -> a -foldDecls = foldr - -foldProgramDecls :: (Decl -> a -> a) -> a -> Program -> a -foldProgramDecls f a (Program decls routs) = - foldDecls f a decls - --- - -lookupDecl (Program decls _) name = - lookupDecl' (filter (isLocationDecl) decls) name - -lookupDecl' [] _ = Nothing -lookupDecl' (decl:decls) name - | (getDeclLocationName decl) == name = Just decl - | otherwise = lookupDecl' decls name - -lookupRoutine (Program _ routines) name = - lookupRoutine' routines name - -lookupRoutine' [] _ = Nothing -lookupRoutine' (rout@(Routine rname _ _):routs) name - | rname == name = Just rout - | otherwise = lookupRoutine' routs name diff --git a/src/SixtyPical/Parser.hs b/src/SixtyPical/Parser.hs deleted file mode 100644 index 960f13c..0000000 --- a/src/SixtyPical/Parser.hs +++ /dev/null @@ -1,685 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Parser (parseProgram) where - -import Numeric (readHex) -import Data.Char (ord) - -import Text.ParserCombinators.Parsec - -import SixtyPical.Model - -{- - -Toplevel ::= {Decl} {Routine}. -Decl ::= "reserve" StorageType LocationName [":" InitialValue] - | "assign" StorageType LocationName Literal - | "external" RoutineName Address. -InitialValue ::= Literal | StringLiteral | "(" {Literal} ")". -StorageType ::= ("byte" | "word" | "vector") ["[" Literal "]"]. -Routine ::= "routine" RoutineName ["outputs" "(" {LocationName} ")"] Block. -Block ::= "{" {Decl} {Command} "}". -Command ::= "if" Branch Block "else" Block - | "lda" (LocationName | Immediate) - | "ldx" (LocationName | Immediate) - | "ldy" (LocationName | Immediate) - | "txa" | "tax" | "tya" | "tay" - | "cmp" (LocationName | Immediate) - | "cpx" (LocationName | Immediate) - | "cpy" (LocationName | Immediate) - | "inx" | "iny" | "dex" | "dey" | "inc" Location | "dec" Location - | "clc" | "cld" | "clv" | "sec" | "sed" - | "with ("sei" | "pha" | "php") Block - | "jmp" LocationName - | "jsr" RoutineName - | "nop". -Branch ::= "bcc" | "bcs" | "beq" | "bmi" | "bne" | "bpl" | "bvc" | "bvs". - --} - -nspaces :: Parser () -nspaces = do - many (space <|> try block_comment <|> line_comment) - return () - -block_comment :: Parser Char -block_comment = do - string "/*" - manyTill anyChar (try (string "*/")) - return ' ' - -line_comment :: Parser Char -line_comment = do - string "//" - manyTill anyChar (char '\n') - return ' ' - -toplevel :: Parser Program -toplevel = do - nspaces - decls <- many decl - routines <- many routine - return $ Program decls routines - -decl :: Parser Decl -decl = try assign <|> try reserve <|> external - -reserve :: Parser Decl -reserve = do - string "reserve" - nspaces - sz <- storage_type - name <- location_name - value <- option [] (do{ string ":"; - nspaces; - x <- initial_value; - return x }) - return $ Reserve name sz value - -assign :: Parser Decl -assign = do - string "assign" - nspaces - sz <- storage_type - name <- location_name - addr <- literal_address - return $ Assign name sz addr - -external :: Parser Decl -external = do - string "external" - nspaces - name <- routineName - addr <- literal_address - return $ External name addr - -storage :: String -> StorageType -> Parser StorageType -storage s t = do - string s - nspaces - return t - -table :: StorageType -> Parser StorageType -table typ = do - string "[" - nspaces - size <- literal_data_value - string "]" - nspaces - return $ Table typ size - -storage_type :: Parser StorageType -storage_type = do - typ <- (storage "byte" Byte) <|> (storage "word" Word) <|> - (storage "vector" Vector) - option typ (table typ) - -initial_value :: Parser [DataValue] -initial_value = - data_value_list <|> string_literal <|> single_literal_data_value - where - single_literal_data_value = do - a <- literal_data_value - return [a] - -data_value_list = do - string "(" - nspaces - a <- many literal_data_value - string ")" - nspaces - return a - --- -- -- - -routine :: Parser Routine -routine = do - string "routine" - nspaces - name <- routineName - outputs <- (try routine_outputs <|> return []) - instrs <- block - return (Routine name outputs instrs) - -routine_outputs :: Parser [StorageLocation] -routine_outputs = do - string "outputs" - nspaces - string "(" - nspaces - locations <- many location - string ")" - nspaces - return locations - -location = (try explicit_register <|> named_location) - -block :: Parser Block -block = do - string "{" - nspaces - ds <- many decl - cs <- many command - string "}" - nspaces - return (Block ds cs) - --- -- -- -- -- -- commands -- -- -- -- -- - -index :: Parser StorageLocation -index = do - string "," - nspaces - c <- (string "x" <|> string "y") - nspaces - return $ case c of - "x" -> X - "y" -> Y - -data AddressingModality = Directly LocationName - | HighBytely LocationName - | LowBytely LocationName - | Indirectly LocationName - | Immediately DataValue - | Implicitly StorageLocation - deriving (Ord, Show, Eq) - -low_byte_of_absolute :: Parser AddressingModality -low_byte_of_absolute = do - string "<" - l <- location_name - return $ LowBytely l - -high_byte_of_absolute :: Parser AddressingModality -high_byte_of_absolute = do - string ">" - l <- location_name - return $ HighBytely l - -indirect_location :: Parser AddressingModality -indirect_location = do - string "(" - nspaces - l <- location_name - string ")" - nspaces - return $ Indirectly l - -direct_location :: Parser AddressingModality -direct_location = do - l <- location_name - return $ Directly l - -explicit_location :: String -> StorageLocation -> Parser StorageLocation -explicit_location s l = do - string s - nspaces - return $ l - -explicit_register :: Parser StorageLocation -explicit_register = ((try $ explicit_location ".a" A) <|> - (try $ explicit_location ".x" X) <|> - (try $ explicit_location ".y" Y) <|> - (try $ explicit_location ".n" FlagN) <|> - (try $ explicit_location ".v" FlagV) <|> - (try $ explicit_location ".d" FlagD) <|> - (try $ explicit_location ".z" FlagZ) <|> - (explicit_location ".c" FlagC)) - -register_location :: Parser AddressingModality -register_location = do - z <- explicit_register - nspaces - return $ Implicitly z -- ironic? - -immediate :: Parser AddressingModality -immediate = do - string "#" - v <- literal_data_value - return $ Immediately v - -addressing_mode :: String -> (AddressingModality -> [StorageLocation] -> Instruction) -> Parser Instruction -addressing_mode opcode f = do - string opcode - nspaces - d <- ((try immediate) <|> (try high_byte_of_absolute) <|> - (try low_byte_of_absolute) <|> (try indirect_location) <|> - (try register_location) <|> (try direct_location)) - indexes <- many index - return $ f d indexes - -command :: Parser Instruction -command = (try lda) <|> - (try ldx) <|> (try ldy) <|> - (try sta) <|> (try stx) <|> (try sty) <|> - (try txa) <|> (try tax) <|> (try tya) <|> (try tay) <|> - (try cmp) <|> (try cpx) <|> (try cpy) <|> - (try inx) <|> (try iny) <|> (try dex) <|> (try dey) <|> - (try inc) <|> (try dec) <|> - (try clc) <|> (try cld) <|> (try clv) <|> (try sec) <|> (try sed) <|> - (try adc) <|> (try SixtyPical.Parser.and) <|> - (try sbc) <|> (try ora) <|> - (try asl) <|> (try bit) <|> (try eor) <|> (try lsr) <|> - (try rol) <|> (try ror) <|> - (try jmp) <|> (try jsr) <|> - (try with_block) <|> - (try copy_routine_statement) <|> - (try copy_general_statement) <|> - if_statement <|> repeat_statement <|> nop - -nop :: Parser Instruction -nop = do - string "nop" - nspaces - return NOP - -asl :: Parser Instruction -asl = do - addressing_mode "asl" gen - where - gen (Implicitly A) [] = SHL A (Immediate 0) - gen (Directly l) [] = SHL (NamedLocation Nothing l) (Immediate 0) - -lsr :: Parser Instruction -lsr = do - addressing_mode "lsr" gen - where - gen (Implicitly A) [] = SHR A (Immediate 0) - gen (Directly l) [] = SHR (NamedLocation Nothing l) (Immediate 0) - -rol :: Parser Instruction -rol = do - addressing_mode "rol" gen - where - gen (Implicitly A) [] = SHL A FlagC - gen (Directly l) [] = SHL (NamedLocation Nothing l) FlagC - -ror :: Parser Instruction -ror = do - addressing_mode "ror" gen - where - gen (Implicitly A) [] = SHR A FlagC - gen (Directly l) [] = SHR (NamedLocation Nothing l) FlagC - -clc :: Parser Instruction -clc = do - string "clc" - nspaces - return $ COPY (Immediate 0) FlagC - -cld :: Parser Instruction -cld = do - string "cld" - nspaces - return $ COPY (Immediate 0) FlagD - -clv :: Parser Instruction -clv = do - string "clv" - nspaces - return $ COPY (Immediate 0) FlagV - -sec :: Parser Instruction -sec = do - string "sec" - nspaces - return $ COPY (Immediate 1) FlagC - -sed :: Parser Instruction -sed = do - string "sed" - nspaces - return $ COPY (Immediate 1) FlagD - -inx :: Parser Instruction -inx = do - string "inx" - nspaces - return $ DELTA X 1 - -iny :: Parser Instruction -iny = do - string "iny" - nspaces - return $ DELTA Y 1 - -dex :: Parser Instruction -dex = do - string "dex" - nspaces - return $ DELTA X (-1) - -dey :: Parser Instruction -dey = do - string "dey" - nspaces - return $ DELTA Y (-1) - -inc :: Parser Instruction -inc = do - string "inc" - nspaces - l <- named_location - return (DELTA l 1) - -dec :: Parser Instruction -dec = do - string "dec" - nspaces - l <- named_location - return (DELTA l (-1)) - -cmp :: Parser Instruction -cmp = do - addressing_mode "cmp" gen - where - gen (Immediately v) [] = CMP A (Immediate v) - gen (LowBytely l) [] = CMP A (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = CMP A (HighByteOf (NamedLocation Nothing l)) - gen (Directly l) [] = CMP A (NamedLocation Nothing l) - -cpx :: Parser Instruction -cpx = do - addressing_mode "cpx" gen - where - gen (Immediately v) [] = CMP X (Immediate v) - gen (Directly l) [] = CMP X (NamedLocation Nothing l) - -cpy :: Parser Instruction -cpy = do - addressing_mode "cpy" gen - where - gen (Immediately v) [] = CMP Y (Immediate v) - gen (Directly l) [] = CMP Y (NamedLocation Nothing l) - -adc :: Parser Instruction -adc = do - addressing_mode "adc" gen - where - gen (Immediately v) [] = ADD A (Immediate v) - gen (LowBytely l) [] = ADD A (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = ADD A (HighByteOf (NamedLocation Nothing l)) - gen (Directly l) [] = ADD A (NamedLocation Nothing l) - -sbc :: Parser Instruction -sbc = do - addressing_mode "sbc" gen - where - gen (Immediately v) [] = SUB A (Immediate v) - gen (LowBytely l) [] = SUB A (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = SUB A (HighByteOf (NamedLocation Nothing l)) - gen (Directly l) [] = SUB A (NamedLocation Nothing l) - -and :: Parser Instruction -and = do - addressing_mode "and" gen - where - gen (Immediately v) [] = AND A (Immediate v) - gen (Directly l) [] = AND A (NamedLocation Nothing l) - -ora :: Parser Instruction -ora = do - addressing_mode "ora" gen - where - gen (Immediately v) [] = OR A (Immediate v) - gen (Directly l) [] = OR A (NamedLocation Nothing l) - -eor :: Parser Instruction -eor = do - addressing_mode "eor" gen - where - gen (Immediately v) [] = XOR A (Immediate v) - gen (Directly l) [] = XOR A (NamedLocation Nothing l) - -bit :: Parser Instruction -bit = do - addressing_mode "bit" gen - where - gen (Directly l) [] = BIT (NamedLocation Nothing l) - -lda :: Parser Instruction -lda = do - addressing_mode "lda" gen - where - gen (Immediately v) [] = COPY (Immediate v) A - gen (LowBytely l) [] = COPY (LowByteOf (NamedLocation Nothing l)) A - gen (HighBytely l) [] = COPY (HighByteOf (NamedLocation Nothing l)) A - gen (Directly l) [] = COPY (NamedLocation Nothing l) A - gen (Directly l) [reg] = COPY (Indexed (NamedLocation Nothing l) reg) A - gen (Indirectly l) [reg] = COPY (IndirectIndexed (NamedLocation Nothing l) reg) A - gen x y = error ("Can't parse lda " ++ (show x) ++ (show y)) - -ldx :: Parser Instruction -ldx = do - addressing_mode "ldx" gen - where - gen (Immediately v) [] = COPY (Immediate v) X - gen (Directly l) [] = COPY (NamedLocation Nothing l) X - -ldy :: Parser Instruction -ldy = do - addressing_mode "ldy" gen - where - gen (Immediately v) [] = COPY (Immediate v) Y - gen (Directly l) [] = COPY (NamedLocation Nothing l) Y - -sta :: Parser Instruction -sta = do - addressing_mode "sta" gen - where - gen (LowBytely l) [] = COPY A (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = COPY A (HighByteOf (NamedLocation Nothing l)) - gen (Directly l) [] = COPY A (NamedLocation Nothing l) - gen (Directly l) [reg] = COPY A (Indexed (NamedLocation Nothing l) reg) - gen (Indirectly l) [reg] = COPY A (IndirectIndexed (NamedLocation Nothing l) reg) - -stx :: Parser Instruction -stx = do - addressing_mode "stx" gen - where - gen (Directly l) [] = COPY X (NamedLocation Nothing l) - gen (LowBytely l) [] = COPY X (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = COPY X (HighByteOf (NamedLocation Nothing l)) - -sty :: Parser Instruction -sty = do - addressing_mode "sty" gen - where - gen (Directly l) [] = COPY Y (NamedLocation Nothing l) - gen (LowBytely l) [] = COPY Y (LowByteOf (NamedLocation Nothing l)) - gen (HighBytely l) [] = COPY Y (HighByteOf (NamedLocation Nothing l)) - -txa :: Parser Instruction -txa = do - string "txa" - nspaces - return (COPY X A) - -tax :: Parser Instruction -tax = do - string "tax" - nspaces - return (COPY A X) - -tya :: Parser Instruction -tya = do - string "tya" - nspaces - return (COPY Y A) - -tay :: Parser Instruction -tay = do - string "tay" - nspaces - return (COPY A Y) - -with_block :: Parser Instruction -with_block = do - string "with" - nspaces - instr <- (try sei) <|> (try pha) <|> php - blk <- block - return (WITH instr blk) - - -sei :: Parser WithInstruction -sei = do - string "sei" - nspaces - return SEI - -pha :: Parser WithInstruction -pha = do - string "pha" - nspaces - return (PUSH A) - -php :: Parser WithInstruction -php = do - string "php" - nspaces - return (PUSH AllFlags) - -jmp :: Parser Instruction -jmp = do - string "jmp" - nspaces - string "(" - nspaces - l <- named_location - string ")" - nspaces - return $ JMPVECTOR l - -jsr :: Parser Instruction -jsr = do - string "jsr" - nspaces - l <- routineName - return $ JSR l - -if_statement :: Parser Instruction -if_statement = do - string "if" - nspaces - brch <- branch - b1 <- block - string "else" - nspaces - b2 <- block - return (IF 0 brch b1 b2) - -repeat_statement :: Parser Instruction -repeat_statement = do - string "repeat" - nspaces - brch <- branch - blk <- block - return (REPEAT 0 brch blk) - -copy_general_statement :: Parser Instruction -copy_general_statement = do - string "copy" - nspaces - - src <- (immediate <|> - register_location <|> - low_byte_of_absolute <|> high_byte_of_absolute <|> direct_location) - srcI <- many index - lhs <- return $ case (src, srcI) of - ((Implicitly reg), []) -> reg - ((Immediately s), []) -> (Immediate s) - ((Directly s), []) -> (NamedLocation Nothing s) - ((Directly s), [reg]) -> (Indexed (NamedLocation Nothing s) reg) - ((LowBytely s), [reg]) -> (LowByteOf (Indexed (NamedLocation Nothing s) reg)) - ((HighBytely s), [reg]) -> (HighByteOf (Indexed (NamedLocation Nothing s) reg)) - - dst <- (low_byte_of_absolute <|> high_byte_of_absolute <|> direct_location) - dstI <- many index - rhs <- return $ case (dst, dstI) of - ((Directly d), []) -> (NamedLocation Nothing d) - ((Directly d), [reg]) -> (Indexed (NamedLocation Nothing d) reg) - ((LowBytely d), [reg]) -> (LowByteOf (Indexed (NamedLocation Nothing d) reg)) - ((HighBytely d), [reg]) -> (HighByteOf (Indexed (NamedLocation Nothing d) reg)) - - return $ COPY lhs rhs - -copy_routine_statement :: Parser Instruction -copy_routine_statement = do - string "copy" - nspaces - string "routine" - nspaces - src <- routineName - string "to" - nspaces - dst <- location_name - dstI <- many index - return $ case dstI of - [] -> COPYROUTINE src (NamedLocation Nothing dst) - [reg] -> COPYROUTINE src (Indexed (NamedLocation Nothing dst) reg) - -branch :: Parser Branch -branch = try (b "bcc" BCC) <|> try (b "bcs" BCS) <|> try (b "beq" BEQ) <|> - try (b "bmi" BMI) <|> try (b "bne" BNE) <|> try (b "bpl" BPL) <|> - try (b "bvc" BVC) <|> (b "bvs" BVS) - -b :: String -> Branch -> Parser Branch -b s k = do - string s - nspaces - return k - -routineName :: Parser String -routineName = do - c <- letter - cs <- many (alphaNum <|> char '_') - nspaces - return (c:cs) - -location_name :: Parser String -location_name = do - c <- letter - cs <- many (alphaNum <|> char '_') - nspaces - return (c:cs) - -named_location :: Parser StorageLocation -named_location = do - name <- location_name - return (NamedLocation Nothing name) - -literal_address = do - a <- literal_value - return (a :: Address) - -literal_data_value = do - a <- literal_value - return (a :: DataValue) - -literal_value = hex_literal <|> decimal_literal - -hex_literal :: Parser Int -hex_literal = do - char '$' - digits <- many hexDigit - nspaces - let ((d, _):_) = readHex digits - return d - -decimal_literal :: Parser Int -decimal_literal = do - digits <- many1 digit - nspaces - return $ read digits - -string_literal :: Parser [DataValue] -string_literal = do - char '"' - s <- manyTill anyChar (char '"') - nspaces - return $ map (\c -> ord c) s - --- -- -- driver -- -- -- - -parseProgram = parse toplevel "" diff --git a/src/SixtyPical/Transformer.hs b/src/SixtyPical/Transformer.hs deleted file mode 100644 index eb387dd..0000000 --- a/src/SixtyPical/Transformer.hs +++ /dev/null @@ -1,279 +0,0 @@ --- encoding: UTF-8 - -module SixtyPical.Transformer ( - numberProgramLoops, fillOutNamedLocationTypes, - renameBlockDecls, liftBlockDecls - ) where - -import SixtyPical.Model - --- -- -- -- -- -- - --- in the following "number" means "assign a unique ID to" and "loop" --- means "REPEAT or IF" (because i'm in such a good mood) - -numberProgramLoops :: Program -> Program -numberProgramLoops (Program decls routines) = - let - (routines', _) = numberRoutinesLoops routines 0 - in - (Program decls routines') - -numberRoutinesLoops :: [Routine] -> InternalID -> ([Routine], InternalID) -numberRoutinesLoops [] iid = ([], iid) -numberRoutinesLoops (routine:routines) iid = - let - (routine', iid') = numberRoutineLoops routine iid - (routines', iid'') = numberRoutinesLoops routines iid' - in - ((routine':routines'), iid'') - -numberRoutineLoops :: Routine -> InternalID -> (Routine, InternalID) -numberRoutineLoops (Routine name outputs block) iid = - let - (block', iid') = numberBlockLoops block iid - in - ((Routine name outputs block'), iid') - -numberBlockLoops :: Block -> InternalID -> (Block, InternalID) -numberBlockLoops block iid = - let - (Block decls instrs) = block - (instrs', iid') = numberInstrsLoops instrs iid - block' = Block decls instrs' - in - (block', iid') - -numberInstrsLoops :: [Instruction] -> InternalID -> ([Instruction], InternalID) -numberInstrsLoops [] iid = ([], iid) -numberInstrsLoops (instr:instrs) iid = - let - (instr', iid') = numberInstruction instr iid - (instrs', iid'') = numberInstrsLoops instrs iid' - in - ((instr':instrs'), iid'') - -numberInstruction :: Instruction -> InternalID -> (Instruction, InternalID) -numberInstruction (IF _ branch b1 b2) iid = - let - (b1', iid') = numberBlockLoops b1 iid - (b2', iid'') = numberBlockLoops b2 iid' - newIid = iid'' + 1 - newInstr = IF newIid branch b1' b2' - in - (newInstr, newIid) -numberInstruction (REPEAT _ branch blk) iid = - let - (blk', iid') = numberBlockLoops blk iid - newIid = iid' + 1 - newInstr = REPEAT newIid branch blk' - in - (newInstr, newIid) -numberInstruction i iid = (i, iid) - --- -- -- - -fillOutNamedLocationTypes p@(Program decls routines) = - mapProgramRoutines (xform) p - where - xform (COPY src dest) = - typeMatch src dest (COPY) - xform (CMP dest other) = - typeMatch dest other (CMP) - xform (ADD dest other) = - typeMatch dest other (ADD) - xform (AND dest other) = - typeMatch dest other (AND) - xform (SUB dest other) = - typeMatch dest other (SUB) - xform (OR dest other) = - typeMatch dest other (OR) - xform (JMPVECTOR dest) = - case (resolve dest) of - d@(NamedLocation (Just Vector) _) -> - JMPVECTOR d - _ -> - error ("jmp to non-vector '" ++ (show dest) ++ "'") - xform (IF iid branch b1 b2) = - IF iid branch (mapBlock xform b1) (mapBlock xform b2) - xform (REPEAT iid branch blk) = - REPEAT iid branch (mapBlock xform blk) - xform (DELTA dest val) = - DELTA (resolve dest) val - xform (WITH SEI blk) = - WITH SEI (mapBlock xform blk) - xform (WITH (PUSH val) blk) = - WITH (PUSH (resolve val)) (mapBlock xform blk) - xform (COPYROUTINE name dest) = - COPYROUTINE name (resolve dest) - xform other = - other - getType (NamedLocation (Just t) _) = t - getType A = Byte - getType X = Byte - getType Y = Byte - getType (Immediate x) = -- TODO! allow promotion! - if x > 255 then Word else Byte - getType (Indexed t _) = - getType t - getType _ = Byte - typeMatch x y constructor = - let - rx = resolve x - ry = resolve y - typeRx = getType rx - typeRy = getType ry - in - if - typeRx == typeRy - then - constructor rx ry - else - case (typeRx, typeRy) of - (Byte, (Table Byte _)) -> constructor rx ry - ((Table Byte _), Byte) -> constructor rx ry - (Word, (Table Word _)) -> constructor rx ry - ((Table Word _), Word) -> constructor rx ry - (Vector, (Table Vector _)) -> constructor rx ry - ((Table Vector _), Vector) -> constructor rx ry - _ -> error ("incompatible types '" ++ (show typeRx) ++ - "' and '" ++ (show typeRy) ++ "'" ++ - " [" ++ (show rx) ++ "," ++ (show ry) ++ "]") - resolve (NamedLocation Nothing name) = - case lookupDecl p name of - Just decl -> - (NamedLocation (Just $ getDeclLocationType decl) name) - _ -> - error ("undeclared location '" ++ name ++ "'") - resolve (Indirect loc) = - (Indirect (resolve loc)) - resolve (Indexed loc reg) = - (Indexed (resolve loc) (resolve reg)) - resolve (IndirectIndexed loc reg) = - (IndirectIndexed (resolve loc) (resolve reg)) - resolve (LowByteOf loc) = - (LowByteOf (resolve loc)) - resolve (HighByteOf loc) = - (HighByteOf (resolve loc)) - resolve other = - other - --- -- -- -- -- - --- TODO: look at all blocks, not just routine's blocks -renameBlockDecls (Program decls routines) = - let - routines' = renameRoutineDecls 1 routines - in - Program decls routines' - -renameRoutineDecls id [] = [] -renameRoutineDecls id ((Routine name outputs block):routs) = - let - (Block decls _) = block - (id', block') = foldDeclsRenaming decls id block - rest = renameRoutineDecls id' routs - in - ((Routine name outputs block'):rest) - -foldDeclsRenaming [] id block = (id, block) -foldDeclsRenaming ((Reserve name typ []):decls) id block = - let - newName = "_temp_" ++ (show id) - id' = id + 1 - block' = mapBlockNames name newName block - block'' = substDeclName name newName block' - in - foldDeclsRenaming decls id' block'' -foldDeclsRenaming ((Reserve name typ _):decls) id block = - error ("block-level '" ++ name ++ "' cannot supply initial value") - --- this is kind of horrible. that we do it this way, i mean -substDeclName n1 n2 (Block decls instrs) = - Block (map (s) decls) instrs - where - s d@(Reserve name typ []) - | name == n1 = (Reserve n2 typ []) - | otherwise = d - - -mapBlockNames n1 n2 (Block decls instrs) = - (Block decls $ mapInstrsNames n1 n2 instrs) - -mapInstrsNames n1 n2 instrs = - map (mapInstrName n1 n2) instrs - -mapInstrName n1 n2 (COPY sl1 sl2) = - COPY (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (CMP sl1 sl2) = - CMP (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (ADD sl1 sl2) = - ADD (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (AND sl1 sl2) = - AND (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (SUB sl1 sl2) = - SUB (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (OR sl1 sl2) = - OR (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (XOR sl1 sl2) = - XOR (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (SHL sl1 sl2) = - SHL (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (SHR sl1 sl2) = - SHR (mapStorageLocationName n1 n2 sl1) (mapStorageLocationName n1 n2 sl2) -mapInstrName n1 n2 (BIT sl1) = - BIT (mapStorageLocationName n1 n2 sl1) -mapInstrName n1 n2 (JMPVECTOR sl1) = - JMPVECTOR (mapStorageLocationName n1 n2 sl1) -mapInstrName n1 n2 (DELTA sl1 v) = - DELTA (mapStorageLocationName n1 n2 sl1) v - -mapInstrName n1 n2 (IF id branch b1 b2) = - IF id branch (mapBlockNames n1 n2 b1) (mapBlockNames n1 n2 b2) - -mapInstrName n1 n2 (REPEAT id branch b1) = - REPEAT id branch (mapBlockNames n1 n2 b1) - -mapInstrName n1 n2 (WITH instr b1) = - WITH instr (mapBlockNames n1 n2 b1) - -{- - | COPYROUTINE RoutineName StorageLocation --} - -mapInstrName n1 n2 other = - other - -mapStorageLocationName n1 n2 (Indirect sl) = - Indirect $ mapStorageLocationName n1 n2 sl -mapStorageLocationName n1 n2 (Indexed sl1 sl2) = - Indexed (mapStorageLocationName n1 n2 sl1) sl2 -mapStorageLocationName n1 n2 (IndirectIndexed sl1 sl2) = - IndirectIndexed (mapStorageLocationName n1 n2 sl1) sl2 - -mapStorageLocationName n1 n2 sl@(NamedLocation typ name) - | name == n1 = NamedLocation typ n2 - | otherwise = sl - -mapStorageLocationName n1 n2 (LowByteOf sl) = - LowByteOf $ mapStorageLocationName n1 n2 sl - -mapStorageLocationName n1 n2 (HighByteOf sl) = - HighByteOf $ mapStorageLocationName n1 n2 sl - -mapStorageLocationName n1 n2 other = - other - --- -- -- -- - --- TODO: look at all blocks, not just routine's blocks -liftBlockDecls (Program decls routines) = - let - liftedDecls = foldr getRoutinesBlockDecls [] routines - in - Program (decls ++ liftedDecls) routines - where - getRoutinesBlockDecls (Routine name outputs block) a = - a ++ (getBlockDecls block) - getBlockDecls (Block decls instrs) = - decls diff --git a/src/sixtypical/__init__.py b/src/sixtypical/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/sixtypical/analyzer.py b/src/sixtypical/analyzer.py new file mode 100644 index 0000000..71e3a1b --- /dev/null +++ b/src/sixtypical/analyzer.py @@ -0,0 +1,159 @@ +# encoding: UTF-8 + +import sys + +from sixtypical.ast import Program, Defn, Routine, Block, Instr +from sixtypical.parser import ConstantRef, LocationRef + + +UNINITIALIZED = 'UNINITIALIZED' +INITIALIZED = 'INITIALIZED' + + +class StaticAnalysisError(ValueError): + pass + + +class UninitializedAccessError(StaticAnalysisError): + pass + + +class IllegalWriteError(StaticAnalysisError): + pass + + +class UsageClashError(StaticAnalysisError): + pass + + +class Context(): + def __init__(self, inputs, outputs, trashes): + self._store = {} + self._writeables = set() + + for ref in inputs: + self._store.setdefault(ref.name, INITIALIZED) + output_names = set() + for ref in outputs: + output_names.add(ref.name) + self._store.setdefault(ref.name, UNINITIALIZED) + self._writeables.add(ref.name) + for ref in trashes: + if ref.name in output_names: + raise UsageClashError(ref.name) + self._store.setdefault(ref.name, UNINITIALIZED) + self._writeables.add(ref.name) + + def assertInitialized(self, *refs): + for ref in refs: + if isinstance(ref, ConstantRef): + pass + elif isinstance(ref, LocationRef): + if self.get(ref) != INITIALIZED: + raise UninitializedAccessError(ref.name) + else: + raise ValueError(ref) + + def assertWriteable(self, *refs): + for ref in refs: + if ref.name not in self._writeables: + raise IllegalWriteError(ref.name) + + def setInitialized(self, *refs): + for ref in refs: + self.set(ref, INITIALIZED) + + def setUninitialized(self, *refs): + for ref in refs: + self.set(ref, UNINITIALIZED) + + def get(self, ref): + if isinstance(ref, ConstantRef): + return INITIALIZED + elif isinstance(ref, LocationRef): + if ref.name not in self._store: + return UNINITIALIZED + return self._store[ref.name] + else: + raise ValueError(ref) + + def set(self, ref, value): + assert isinstance(ref, LocationRef) + self._store[ref.name] = value + + +def analyze_program(program): + assert isinstance(program, Program) + routines = {r.name: r for r in program.routines} + for routine in program.routines: + analyze_routine(routine, routines) + + +def analyze_routine(routine, routines): + assert isinstance(routine, Routine) + context = Context(routine.inputs, routine.outputs, routine.trashes) + analyze_block(routine.block, context, routines) + for ref in routine.outputs: + context.assertInitialized(ref) + + +def analyze_block(block, context, routines): + assert isinstance(block, Block) + for i in block.instrs: + analyze_instr(i, context, routines) + + +def analyze_instr(instr, context, routines): + assert isinstance(instr, Instr) + opcode = instr.opcode + dest = instr.dest + src = instr.src + + if opcode == 'ld': + context.assertInitialized(src) + context.assertWriteable(dest, LocationRef('z'), LocationRef('n')) + context.setInitialized(dest, LocationRef('z'), LocationRef('n')) + elif opcode == 'st': + context.assertInitialized(src) + context.assertWriteable(dest) + context.setInitialized(dest) + elif opcode in ('add', 'sub'): + context.assertInitialized(src, dest, LocationRef('c')) + context.assertWriteable(dest, + LocationRef('z'), LocationRef('n'), + LocationRef('c'), LocationRef('v'), + ) + context.setInitialized(dest, + LocationRef('z'), LocationRef('n'), + LocationRef('c'), LocationRef('v'), + ) + elif opcode in ('inc', 'dec'): + context.assertInitialized(dest) + context.assertWriteable(dest, LocationRef('z'), LocationRef('n')) + context.setInitialized(dest, LocationRef('z'), LocationRef('n')) + elif opcode == 'cmp': + context.assertInitialized(src, dest) + context.assertWriteable(LocationRef('z'), LocationRef('n'), LocationRef('c')) + context.setInitialized(LocationRef('z'), LocationRef('n'), LocationRef('c')) + elif opcode in ('and', 'or', 'xor'): + context.assertInitialized(sec, dest) + context.assertWriteable(dest, LocationRef('z'), LocationRef('n')) + context.setInitialized(dest, LocationRef('z'), LocationRef('n')) + elif opcode in ('shl', 'shr'): + context.assertInitialized(dest) + context.assertWriteable(dest, LocationRef('z'), LocationRef('n'), LocationRef('c')) + context.setInitialized(dest, LocationRef('z'), LocationRef('n'), LocationRef('c')) + elif opcode == 'call': + routine = routines[instr.name] + for ref in routine.inputs: + context.assertInitialized(ref) + for ref in routine.outputs: + context.assertWriteable(ref) + context.setInitialized(ref) + for ref in routine.trashes: + context.assertWriteable(ref) + context.setUninitialized(ref) + elif opcode == 'if': + pass + else: + raise NotImplementedError diff --git a/src/sixtypical/ast.py b/src/sixtypical/ast.py new file mode 100644 index 0000000..86b614b --- /dev/null +++ b/src/sixtypical/ast.py @@ -0,0 +1,37 @@ +# encoding: UTF-8 + +class AST(object): + def __init__(self, **kwargs): + self.attrs = kwargs + + def __repr__(self): + return "%s(%r)" % (self.__class__.__name__, self.attrs) + + def __getattr__(self, name): + if name in self.attrs: + return self.attrs[name] + raise AttributeError(name) + + +class Program(AST): + pass + + +class Defn(AST): + pass + + +class Routine(AST): + pass + + +class DecLoc(AST): + pass + + +class Block(AST): + pass + + +class Instr(AST): + pass diff --git a/src/sixtypical/evaluator.py b/src/sixtypical/evaluator.py new file mode 100644 index 0000000..0f075f3 --- /dev/null +++ b/src/sixtypical/evaluator.py @@ -0,0 +1,146 @@ +# encoding: UTF-8 + +from sixtypical.ast import Program, Defn, Routine, Block, Instr +from sixtypical.parser import ConstantRef, LocationRef + + +# TODO: should not inherit from dict +class Context(dict): + def get(self, ref): + if isinstance(ref, ConstantRef): + return ref.value + elif isinstance(ref, LocationRef): + return self[ref.name] + else: + raise ValueError(ref) + + def set(self, ref, value): + assert isinstance(ref, LocationRef) + self[ref.name] = value + + +def eval_program(program): + assert isinstance(program, Program) + routines = {r.name: r for r in program.routines} + context = Context({ + 'a': 0, 'x': 0, 'y': 0, + 'c': 0, 'n': 0, 'z': 0, 'v': 0 + }) + eval_routine(routines['main'], context, routines) + return context + + +def eval_routine(routine, context, routines): + assert isinstance(routine, Routine) + eval_block(routine.block, context, routines) + + +def eval_block(block, context, routines): + assert isinstance(block, Block) + for i in block.instrs: + eval_instr(i, context, routines) + + +def eval_instr(instr, context, routines): + assert isinstance(instr, Instr) + opcode = instr.opcode + dest = instr.dest + src = instr.src + + if opcode == 'ld': + result = context.get(src) + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'st': + context.set(dest, context.get(src)) + elif opcode == 'add': + carry = context['c'] + val = context.get(src) + now = context.get(dest) + result = now + val + carry + if result > 255: + result &= 255 + context['c'] = 1 + else: + context['c'] = 0 + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'sub': + carry = context['c'] + val = context.get(src) + now = context.get(dest) + result = now - val - carry + if result < 0: + result &= 255 + context['c'] = 1 + else: + context['c'] = 0 + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'inc': + val = context.get(dest) + result = (val + 1) & 255 + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'dec': + val = context.get(dest) + result = (val - 1) & 255 + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'cmp': + val = context.get(src) + now = context.get(dest) + result = now - val + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + if result < 0: + result &= 255 + context['c'] = 1 + else: + context['c'] = 0 + elif opcode == 'and': + result = context.get(dest) & context.get(src) + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'or': + result = context.get(dest) | context.get(src) + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'xor': + result = context.get(dest) ^ context.get(src) + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'shl': + val = context.get(dest) + carry = context['c'] + context['c'] = 1 if val & 128 else 0 + result = ((val << 1) + carry) & 255 + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'shr': + val = context.get(dest) + carry = context['c'] + context['c'] = 1 if val & 1 else 0 + result = (val >> 1) + (carry * 128) + context['z'] = 1 if result == 0 else 0 + context['n'] = 1 if result & 128 else 0 + context.set(dest, result) + elif opcode == 'call': + eval_routine(routines[instr.name], context, routines) + elif opcode == 'if': + val = context.get(src) + if val != 0: + eval_block(instr.block1, context, routines) + elif instr.block2: + eval_block(instr.block2, context, routines) + else: + raise NotImplementedError diff --git a/src/sixtypical/objects.py b/src/sixtypical/objects.py new file mode 100644 index 0000000..99df4f2 --- /dev/null +++ b/src/sixtypical/objects.py @@ -0,0 +1,31 @@ + + +class Type(object): + pass + +class BitType(Type): + pass + +class ByteType(Type): + pass + +class MemoryLocation(object): + def __init__(self, name, type_=ByteType, readonly=False): + self.name = name + self.type_ = type_ + self.readonly = readonly + + +regA = MemoryLocation('a') +regX = MemoryLocation('x') +regY = MemoryLocation('y') + +regC = MemoryLocation('c', type=BitType) +regZ = MemoryLocation('z', type=BitType) +regN = MemoryLocation('n', type=BitType) +regV = MemoryLocation('v', type=BitType) + + +class Context(dict): + # maps MemoryLoction -> properties: uninitialized, initialized, written + pass diff --git a/src/sixtypical/parser.py b/src/sixtypical/parser.py new file mode 100644 index 0000000..31ed021 --- /dev/null +++ b/src/sixtypical/parser.py @@ -0,0 +1,217 @@ +# encoding: UTF-8 + +import re + +from sixtypical.ast import Program, Defn, Routine, Block, Instr + + +class Scanner(object): + def __init__(self, text): + self.text = text + self.token = None + self.type = None + self.scan() + + def scan_pattern(self, pattern, type, token_group=1, rest_group=2): + pattern = r'^(' + pattern + r')(.*?)$' + match = re.match(pattern, self.text, re.DOTALL) + if not match: + return False + else: + self.type = type + self.token = match.group(token_group) + self.text = match.group(rest_group) + return True + + def scan(self): + self.scan_pattern(r'[ \t\n\r]*', 'whitespace') + if not self.text: + self.token = None + self.type = 'EOF' + return + if self.scan_pattern(r'\,|\/|\{|\}', 'operator'): + return + if self.scan_pattern(r'\d+', 'integer literal'): + return + if self.scan_pattern(r'\"(.*?)\"', 'string literal', + token_group=2, rest_group=3): + return + if self.scan_pattern(r'\w+', 'identifier'): + return + if self.scan_pattern(r'.', 'unknown character'): + return + else: + raise AssertionError("this should never happen, self.text=(%s)" % self.text) + + def expect(self, token): + if self.token == token: + self.scan() + else: + raise SyntaxError("Expected '%s', but found '%s'" % + (token, self.token)) + + def on(self, token): + return self.token == token + + def on_type(self, type): + return self.type == type + + def check_type(self, type): + if not self.type == type: + raise SyntaxError("Expected %s, but found %s ('%s')" % + (type, self.type, self.token)) + + def consume(self, token): + if self.token == token: + self.scan() + return True + else: + return False + + +# - - - - + + +class LocationRef(object): + def __init__(self, name): + self.name = name + + def __repr__(self): + return 'LocationRef(%r)' % self.name + + +class ConstantRef(object): + def __init__(self, value): + self.value = value + + def __repr__(self): + return 'ConstantRef(%r)' % self.value + + +# - - - - + + +class Parser(object): + def __init__(self, text): + self.scanner = Scanner(text) + self.symbols = {} + + def lookup(self, name): + if name in self.symbols: + return LocationRef(name) + else: + raise KeyError(name) + + def program(self): + defns = [] + routines = [] + while self.scanner.on('byte'): + defn = self.defn() + name = defn.name + if name in self.symbols: + raise KeyError(name) + self.symbols[name] = defn + defns.append(defn) + while self.scanner.on('routine'): + routine = self.routine() + name = routine.name + if name in self.symbols: + raise KeyError(name) + self.symbols[name] = routine + routines.append(routine) + return Program(defns=defns, routines=routines) + + def defn(self): + self.scanner.expect('byte') + name = self.scanner.token + self.scanner.scan() + return Defn(name=name) + + def routine(self): + self.scanner.expect('routine') + name = self.scanner.token + self.scanner.scan() + inputs = [] + outputs = [] + trashes = [] + if self.scanner.consume('inputs'): + inputs = self.locexprs() + if self.scanner.consume('outputs'): + outputs = self.locexprs() + if self.scanner.consume('trashes'): + trashes = self.locexprs() + block = self.block() + return Routine( + name=name, inputs=inputs, outputs=outputs, trashes=trashes, + block=block + ) + + def locexprs(self): + accum = [] + accum.append(self.locexpr()) + while self.scanner.consume(','): + accum.append(self.locexpr()) + return accum + + def locexpr(self): + if self.scanner.token in ('a', 'x', 'y', 'c', 'z', 'n', 'v'): + loc = LocationRef(self.scanner.token) + self.scanner.scan() + return loc + elif self.scanner.token in ('on', 'off'): + loc = ConstantRef(1 if self.scanner.token == 'on' else 0) + self.scanner.scan() + return loc + elif self.scanner.on_type('integer literal'): + loc = ConstantRef(int(self.scanner.token)) + self.scanner.scan() + return loc + else: + loc = self.lookup(self.scanner.token) + self.scanner.scan() + return loc + + def block(self): + instrs = [] + self.scanner.expect('{') + while not self.scanner.on('}'): + instrs.append(self.instr()) + self.scanner.expect('}') + return Block(instrs=instrs) + + def instr(self): + if self.scanner.consume('if'): + src = self.locexpr() + block1 = self.block() + block2 = None + if self.scanner.consume('else'): + block2 = self.block() + return Instr(opcode='if', dest=None, src=src, block1=block1, block2=block2) + elif self.scanner.token in ("ld", "add", "sub", "cmp", "and", "or", "xor"): + opcode = self.scanner.token + self.scanner.scan() + dest = self.locexpr() + self.scanner.expect(',') + src = self.locexpr() + return Instr(opcode=opcode, dest=dest, src=src) + elif self.scanner.token in ("st",): + opcode = self.scanner.token + self.scanner.scan() + src = self.locexpr() + self.scanner.expect(',') + dest = self.locexpr() + return Instr(opcode=opcode, dest=dest, src=src) + elif self.scanner.token in ("shl", "shr", "inc", "dec"): + opcode = self.scanner.token + self.scanner.scan() + dest = self.locexpr() + return Instr(opcode=opcode, dest=dest, src=None) + elif self.scanner.token in ("call"): + opcode = self.scanner.token + self.scanner.scan() + name = self.scanner.token + self.scanner.scan() + # TODO: check that is has been defined + return Instr(opcode=opcode, name=name, dest=None, src=None) + else: + raise ValueError('bad opcode') diff --git a/test.sh b/test.sh index c32edf9..98b1f2b 100755 --- a/test.sh +++ b/test.sh @@ -1,7 +1,5 @@ #!/bin/sh -FILES="doc/Checking.markdown - doc/Emitting.markdown - doc/Instruction_Support.markdown - doc/Analyzing.markdown" -./build.sh && falderal --substring-error ${FILES} +falderal --substring-error \ + tests/SixtyPical\ Execution.md \ + tests/SixtyPical\ Analysis.md diff --git a/tests/SixtyPical Analysis.md b/tests/SixtyPical Analysis.md new file mode 100644 index 0000000..7f90117 --- /dev/null +++ b/tests/SixtyPical Analysis.md @@ -0,0 +1,415 @@ +Sixtypical Analysis +=================== + +This is a test suite, written in [Falderal][] format, for the Sixtypical +static analysis rules. + +[Falderal]: http://catseye.tc/node/Falderal + + -> Functionality "Analyze Sixtypical program" is implemented by + -> shell command "bin/sixtypical --analyze %(test-body-file)" + + -> Tests for functionality "Analyze Sixtypical program" + +### Rudiments ### + +Routines must declare their inputs, outputs, and memory locations they trash. + + | routine up + | inputs a + | outputs a + | trashes c, z, v, n + | { + | st off, c + | add a, 1 + | } + = ok + +Routines may not declare a memory location to be both an output and trashed. + + | routine main + | outputs a + | trashes a + | { + | ld a, 0 + | } + ? UsageClashError: a + +If a routine declares it outputs a location, that location should be initialized. + + | routine main + | outputs a, x, z, n + | { + | ld x, 0 + | } + ? UninitializedAccessError: a + + | routine main + | inputs a + | outputs a + | { + | } + = ok + +If a routine declares it outputs a location, that location may or may not have +been initialized. Trashing is mainly a signal to the caller. + + | routine main + | trashes x, z, n + | { + | ld x, 0 + | } + = ok + + | routine main + | trashes x, z, n + | { + | } + = ok + +If a routine modifies a location, it needs to either output it or trash it. + + | routine main + | { + | ld x, 0 + | } + ? IllegalWriteError: x + + | routine main + | outputs x, z, n + | { + | ld x, 0 + | } + = ok + + | routine main + | trashes x, z, n + | { + | ld x, 0 + | } + = ok + +### ld ### + +Can't `ld` from a memory location that isn't initialized. + + | routine main + | inputs a, x + | trashes a, z, n + | { + | ld a, x + | } + = ok + + | routine main + | inputs a + | trashes a + | { + | ld a, x + | } + ? UninitializedAccessError: x + +Can't `ld` to a memory location that doesn't appear in (outputs ∪ trashes). + + | routine main + | trashes a, z, n + | { + | ld a, 0 + | } + = ok + + | routine main + | outputs a + | trashes z, n + | { + | ld a, 0 + | } + = ok + + | routine main + | outputs z, n + | trashes a + | { + | ld a, 0 + | } + = ok + + | routine main + | trashes z, n + | { + | ld a, 0 + | } + ? IllegalWriteError: a + + | routine main + | trashes a, n + | { + | ld a, 0 + | } + ? IllegalWriteError: z + +### st ### + +Can't `st` from a memory location that isn't initialized. + + | byte lives + | routine main + | inputs x + | trashes lives + | { + | st x, lives + | } + = ok + + | byte lives + | routine main + | trashes x, lives + | { + | st x, lives + | } + ? UninitializedAccessError: x + +Can't `st` to a memory location that doesn't appear in (outputs ∪ trashes). + + | byte lives + | routine main + | trashes lives + | { + | st 0, lives + | } + = ok + + | byte lives + | routine main + | outputs lives + | { + | st 0, lives + | } + = ok + + | byte lives + | routine main + | inputs lives + | { + | st 0, lives + | } + ? IllegalWriteError: lives + +### add ### + +Can't `add` from or to a memory location that isn't initialized. + + | routine main + | inputs a + | outputs a + | trashes c, z, v, n + | { + | st off, c + | add a, 0 + | } + = ok + + | byte lives + | routine main + | inputs a + | outputs a + | trashes c, z, v, n + | { + | st off, c + | add a, lives + | } + ? UninitializedAccessError: lives + + | byte lives + | routine main + | inputs lives + | outputs a + | trashes c, z, v, n + | { + | st off, c + | add a, lives + | } + ? UninitializedAccessError: a + +Can't `add` to a memory location that isn't writeable. + + | routine main + | inputs a + | trashes c + | { + | st off, c + | add a, 0 + | } + ? IllegalWriteError: a + +### ... many missing tests ... ### + +### call ### + +When calling a routine, all of the locations it lists as inputs must be +initialized. + + | byte lives + | + | routine foo + | inputs x + | trashes lives + | { + | st x, lives + | } + | + | routine main + | { + | call foo + | } + ? UninitializedAccessError: x + +Note that if you call a routine that trashes a location, you also trash it. + + | byte lives + | + | routine foo + | inputs x + | trashes lives + | { + | st x, lives + | } + | + | routine main + | outputs x, z, n + | { + | ld x, 0 + | call foo + | } + ? IllegalWriteError: lives + + | byte lives + | + | routine foo + | inputs x + | trashes lives + | { + | st x, lives + | } + | + | routine main + | outputs x, z, n + | trashes lives + | { + | ld x, 0 + | call foo + | } + = ok + +You can't output a value that the thing you called trashed. + + | byte lives + | + | routine foo + | inputs x + | trashes lives + | { + | st x, lives + | } + | + | routine main + | outputs x, z, n, lives + | { + | ld x, 0 + | call foo + | } + ? UninitializedAccessError: lives + +...unless you write to it yourself afterwards. + + | byte lives + | + | routine foo + | inputs x + | trashes lives + | { + | st x, lives + | } + | + | routine main + | outputs x, z, n, lives + | { + | ld x, 0 + | call foo + | st x, lives + | } + = ok + +If a routine declares outputs, they are initialized in the caller after +calling it. + + | routine foo + | outputs x, z, n + | { + | ld x, 0 + | } + | + | routine main + | outputs a + | trashes x, z, n + | { + | call foo + | ld a, x + | } + = ok + + | routine foo + | { + | } + | + | routine main + | outputs a + | trashes x + | { + | call foo + | ld a, x + | } + ? UninitializedAccessError: x + +If a routine trashes locations, they are uninitialized in the caller after +calling it. + + | routine foo + | trashes x, z, n + | { + | ld x, 0 + | } + = ok + + | routine foo + | trashes x, z, n + | { + | ld x, 0 + | } + | + | routine main + | outputs a + | trashes x, z, n + | { + | call foo + | ld a, x + | } + ? UninitializedAccessError: x + +### if ### + +Both blocks of an `if` are analyzed. + + | routine foo + | inputs a + | outputs a + | trashes z, n, c + | { + | cmp a, 42 + | if z { + | ld a, 7 + | } else { + | ld a, 23 + | } + | } + = ok diff --git a/tests/SixtyPical Execution.md b/tests/SixtyPical Execution.md new file mode 100644 index 0000000..968e7f8 --- /dev/null +++ b/tests/SixtyPical Execution.md @@ -0,0 +1,371 @@ +Sixtypical Execution +==================== + +This is a test suite, written in [Falderal][] format, for the dynamic +execution behaviour of the Sixtypical language, disgregarding static analysis. + +[Falderal]: http://catseye.tc/node/Falderal + + -> Functionality "Execute Sixtypical program" is implemented by + -> shell command "bin/sixtypical --execute %(test-body-file)" + + -> Tests for functionality "Execute Sixtypical program" + +Rudimentary program. + + | routine main { + | ld a, 0 + | add a, 1 + | } + = a: 1 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +Program accesses a memory location. + + | byte lives + | + | routine main { + | ld a, 0 + | st a, lives + | ld x, lives + | add x, 1 + | st x, lives + | } + = a: 0 + = c: 0 + = lives: 1 + = n: 0 + = v: 0 + = x: 1 + = y: 0 + = z: 0 + +Can't access an undeclared memory location. + + | routine main { + | ld a, 0 + | st a, lives + | } + ? KeyError + +Can't define two memory locations with the same name. + + | byte lives + | byte lives + | + | routine main { + | ld a, 0 + | st a, lives + | } + ? KeyError + +Add honours carry. + + | routine main { + | ld a, 255 + | st on, c + | add a, 0 + | } + = a: 0 + = c: 1 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 1 + + | routine main { + | ld a, 255 + | st off, c + | add a, 1 + | } + = a: 0 + = c: 1 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 1 + +Subtract honours carry. + + | routine main { + | ld a, 0 + | st on, c + | sub a, 0 + | } + = a: 255 + = c: 1 + = n: 1 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + + | routine main { + | ld a, 0 + | st off, c + | sub a, 1 + | } + = a: 255 + = c: 1 + = n: 1 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +Inc and dec do not honour carry, but do set n and z. + + | routine main { + | ld x, 254 + | st on, c + | inc x + | } + = a: 0 + = c: 1 + = n: 1 + = v: 0 + = x: 255 + = y: 0 + = z: 0 + + | routine main { + | ld y, 1 + | st on, c + | dec y + | } + = a: 0 + = c: 1 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 1 + +Compare affects, but does not use, carry. + + | routine main { + | ld a, 1 + | st on, c + | cmp a, 1 + | } + = a: 1 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 1 + + | routine main { + | ld a, 1 + | st off, c + | cmp a, 5 + | } + = a: 1 + = c: 1 + = n: 1 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +AND. + + | routine main { + | ld a, 15 + | and a, 18 + | } + = a: 2 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +OR. + + | routine main { + | ld a, 34 + | or a, 18 + | } + = a: 50 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +XOR. + + | routine main { + | ld a, 34 + | xor a, 18 + | } + = a: 48 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +Shift left. + + | routine main { + | ld a, 129 + | st off, c + | shl a + | } + = a: 2 + = c: 1 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + + | routine main { + | ld a, 0 + | st on, c + | shl a + | } + = a: 1 + = c: 0 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +Shift right. + + | routine main { + | ld a, 129 + | st off, c + | shr a + | } + = a: 64 + = c: 1 + = n: 0 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + + | routine main { + | ld a, 0 + | st on, c + | shr a + | } + = a: 128 + = c: 0 + = n: 1 + = v: 0 + = x: 0 + = y: 0 + = z: 0 + +Call routine. + + | routine up { + | inc x + | inc y + | } + | routine main { + | ld x, 0 + | ld y, 1 + | call up + | call up + | } + = a: 0 + = c: 0 + = n: 0 + = v: 0 + = x: 2 + = y: 3 + = z: 0 + +Can't call routine that hasn;t been defined. + + | routine main { + | ld x, 0 + | ld y, 1 + | call up + | call up + | } + ? KeyError + +Can't define two routines with the same name. + + | routine main { + | inc x + | inc y + | } + | routine main { + | ld x, 0 + | ld y, 1 + | } + ? KeyError + +If. + + | routine main { + | ld x, 40 + | cmp x, 40 + | if z { + | ld a, 1 + | } else { + | ld a, 8 + | } + | ld x, 2 + | } + = a: 1 + = c: 0 + = n: 0 + = v: 0 + = x: 2 + = y: 0 + = z: 0 + + | routine main { + | ld x, 39 + | cmp x, 40 + | if z { + | ld a, 1 + | } else { + | ld a, 8 + | } + | ld x, 2 + | } + = a: 8 + = c: 1 + = n: 0 + = v: 0 + = x: 2 + = y: 0 + = z: 0 + + | routine main { + | ld x, 39 + | cmp x, 40 + | if z { + | ld a, 1 + | } + | ld x, 2 + | } + = a: 0 + = c: 1 + = n: 0 + = v: 0 + = x: 2 + = y: 0 + = z: 0