added logic in assembler to distinguish between registers and labels

and thus succeeded in getting rid of the register dependency
in the assembler namespace.
This commit is contained in:
Richard Harrington 2013-08-10 01:34:03 -04:00
parent 528b18a45d
commit 94cf5b3f47
3 changed files with 91 additions and 72 deletions

View File

@ -38,11 +38,10 @@
(re-seq-with-pos lex-re line)))
(defn lex
"Lexes a sequence of lines. After this point, tokens
are no longer grouped by line (line numbers have been
captured in metadata, along with column numbers)."
"Lexes a sequence of lines into a sequence of sequences of tokens
(referred to in docstrings for parsing functions as lines of tokens)."
[lines]
(apply concat (map-indexed lex-line lines)))
(map-indexed lex-line lines))
(defn str->int
"Integer/parseInt, but returns nil on failure"
@ -58,16 +57,15 @@
(def return-err (constantly "Invalid word or symbol"))
(defn parse-token
"takes a vector of reg-names and a token with a token-str field and parses the token.
"parses a token with a token-str field.
needs to work with the original token map by using dissoc and into
(rather than building a new one) because it contains line and column
number metadata."
[{token-str :token-str :as token} reg-names]
[{token-str :token-str :as token}]
(let [parser-priority
[[(set reg-names) :register]
[(set commands) :command]
[[(set commands) :command]
[str->int :number]
[valid-word :label]
[valid-word :identifier]
[return-err :error]]]
(some
(fn [[parser token-type]]
@ -76,19 +74,42 @@
:token-str)))
parser-priority)))
(defn parse
"take the tokens and convert them to structured source code ready for compiling.
if there's an error, returns a different type: just the token,
outside of any sequence."
[initial-tokens reg-names]
(defn parse-line
"takes a line of tokens and runs each token through parse-token for the first
pass of determining its type. Then parse-line further divides :identifier
tokens into two types: :label if it's the only thing on its line or it follows
a 'GOTO' or a 'GOSUB', and :register otherwise.
If we encounter an error, just return the token, not a sequence of tokens."
[initial-tokens]
(loop [[token & tail :as tokens] initial-tokens
parsed-tokens []]
(if (empty? tokens)
parsed-tokens
(let [{token-type :type :as parsed-token} (parse-token token reg-names)]
(if (= token-type :error)
parsed-token
(recur tail (conj parsed-tokens parsed-token)))))))
(let [{token-type :type token-val :val :as parsed-token} (parse-token token)]
(case token-type
:error parsed-token
(:command :number) (recur tail (conj parsed-tokens parsed-token))
:identifier (if (or (= (count initial-tokens) 1)
(#{"GOTO" "GOSUB"} (:val (last parsed-tokens))))
(recur tail (conj parsed-tokens (assoc parsed-token :type :label)))
(recur tail (conj parsed-tokens (assoc parsed-token :type :register)))))))))
(defn parse
"take the lines of tokens and converts them to :val and :type format.
After this point, tokens are no longer separated into sequences of sequences
according to the linebreaks in the original source code --
if we need that information later for error reporting, it's in the metadata.
if there's an error, this function just returns the token,
outside of any sequence."
[initial-token-lines]
(loop [[token-line & tail :as token-lines] initial-token-lines
parsed-token-lines []]
(if (empty? token-lines)
parsed-token-lines
(let [parsed-line (parse-line token-line)]
(if (= (:type parsed-line) :error)
parsed-line
(recur tail (concat parsed-token-lines parsed-line)))))))
(defn disambiguate-minus-signs
[initial-tokens]
@ -118,12 +139,12 @@
(if (empty? tokens)
result
(match [token]
[{:type (:or :number :register)}]
(recur tail (conj result [(into token {:val ",", :type :command}) token]))
[(:or {:type :label} {:type :command, :val "ENDSUB"})]
(recur tail (conj result [token nil]))
[{:type :command}]
(recur (rest tail) (conj result [token (first tail)]))))))
[{:type (:or :number :register)}]
(recur tail (conj result [(into token {:val ",", :type :command}) token]))
[(:or {:type :label} {:type :command, :val "ENDSUB"})]
(recur tail (conj result [token nil]))
[{:type :command}]
(recur (rest tail) (conj result [token (first tail)]))))))
; TODO: preserve :line and :pos metadata with labels,
; when labels are transferred from the instruction list to the label map
@ -143,17 +164,16 @@
(recur tail (assoc-in result [:labels (command :val)] next-instr-num))
(recur tail (assoc-in result [:instrs next-instr-num] instr)))))))
(defn assemble [src-code reg-names]
(defn assemble [src-code]
"compiles robotwar code, with error-checking beginning after the lexing
step. All functions that return errors will return a map with the keyword
:error, and then a token with a :val field containing the error string,
and metadata containing :pos and :line fields containing the location.
So far only parse implements error-checking."
(let [parse-with-reg-names #(parse % reg-names)
lexed (-> src-code split-lines strip-comments lex)]
(let [lexed (-> src-code split-lines strip-comments lex)]
(reduce (fn [result step]
(if (= (:type result) :error)
result
(step result)))
lexed
[parse-with-reg-names disambiguate-minus-signs make-instr-pairs map-labels])))
[parse disambiguate-minus-signs make-instr-pairs map-labels])))

View File

@ -15,7 +15,7 @@
:instr-ptr 0
:call-stack []
:registers registers
:obj-code (assembler/assemble src-code (keys registers))})
:obj-code (assembler/assemble src-code)})
(defn resolve-arg [{arg-val :val arg-type :type} registers labels world read-register]
"resolves an instruction argument to a numeric value

View File

@ -1,8 +1,7 @@
(ns robotwar.assembler-test
(:use (clojure [string :only [join]]
[test])
[robotwar.assembler])
(:require [robotwar.register :as register]))
[robotwar.assembler]))
(def line1 "IF DAMAGE # D GOTO MOVE ; comment or something")
(def line2 "AIM-17 TO AIM ; other comment")
@ -15,37 +14,37 @@
(def line-no-comments3 "IF X<-5 GOTO SCAN")
(def multi-line ["SCAN" "6 TO AIM"])
(def lexed-multi-line [{:token-str "SCAN"}
{:token-str "6"}
{:token-str "TO"}
{:token-str "AIM"}])
(def lexed-multi-line [[{:token-str "SCAN"}]
[{:token-str "6"}
{:token-str "TO"}
{:token-str "AIM"}]])
(def lexed-tokens1 [{:token-str "IF"}
{:token-str "DAMAGE"}
{:token-str "#"}
{:token-str "D"}
{:token-str "GOTO"}
{:token-str "MOVE"}])
(def lexed-tokens1 [[{:token-str "IF"}
{:token-str "DAMAGE"}
{:token-str "#"}
{:token-str "D"}
{:token-str "GOTO"}
{:token-str "MOVE"}]])
(def lexed-tokens2 [{:token-str "AIM"}
{:token-str "-"}
{:token-str "17"}
{:token-str "TO"}
{:token-str "AIM"}])
(def lexed-tokens2 [[{:token-str "AIM"}
{:token-str "-"}
{:token-str "17"}
{:token-str "TO"}
{:token-str "AIM"}]])
(def lexed-tokens3 [{:token-str "IF"}
{:token-str "X"}
{:token-str "<"}
{:token-str "-"}
{:token-str "5"}
{:token-str "GOTO"}
{:token-str "SCAN"}])
(def lexed-tokens3 [[{:token-str "IF"}
{:token-str "X"}
{:token-str "<"}
{:token-str "-"}
{:token-str "5"}
{:token-str "GOTO"}
{:token-str "SCAN"}]])
(def lexed-tokens4 [{:token-str "AIM"}
{:token-str "@"}
{:token-str "17"}
{:token-str "TO"}
{:token-str "AIM"}])
(def lexed-tokens4 [[{:token-str "AIM"}
{:token-str "@"}
{:token-str "17"}
{:token-str "TO"}
{:token-str "AIM"}]])
(def parsed-tokens2 [{:val "AIM", :type :register}
{:val "-", :type :command}
@ -212,47 +211,47 @@
(deftest parse-token-register
(testing "parsing register token"
(is (= (parse-token {:token-str "AIM"} register/reg-names)
{:val "AIM", :type :register}))))
(is (= (parse-token {:token-str "AIM"})
{:val "AIM", :type :identifier}))))
(deftest parse-token-command-word
(testing "parsing command token (word)"
(is (= (parse-token {:token-str "GOTO"} register/reg-names)
(is (= (parse-token {:token-str "GOTO"})
{:val "GOTO", :type :command}))))
(deftest parse-token-command-operator
(testing "parsing command token (operator)"
(is (= (parse-token {:token-str "#"} register/reg-names)
(is (= (parse-token {:token-str "#"})
{:val "#", :type :command}))))
(deftest parse-token-number
(testing "parsing number token"
(is (= (parse-token {:token-str "-17"} register/reg-names)
(is (= (parse-token {:token-str "-17"})
{:val -17, :type :number}))))
(deftest parse-token-label
(testing "parsing label token"
(is (= (parse-token {:token-str "SCAN"} register/reg-names)
{:val "SCAN", :type :label}))))
(is (= (parse-token {:token-str "SCAN"})
{:val "SCAN", :type :identifier}))))
(deftest parse-token-error
(testing "parsing error token"
(is (= (parse-token {:token-str "-GOTO"} register/reg-names)
(is (= (parse-token {:token-str "-GOTO"})
{:val "Invalid word or symbol", :type :error}))))
(deftest parse-tokens-minus-sign
(testing "parsing tokens with a binary minus sign"
(is (= (parse lexed-tokens2 register/reg-names)
(is (= (parse lexed-tokens2)
parsed-tokens2))))
(deftest parse-tokens-negative-sign
(testing "parsing tokens with a unary negative sign"
(is (= (parse lexed-tokens3 register/reg-names)
(is (= (parse lexed-tokens3)
parsed-tokens3))))
(deftest parse-tokens-error
(testing "parsing tokens with an invalid operator"
(is (= (parse lexed-tokens4 register/reg-names)
(is (= (parse lexed-tokens4)
parsed-tokens4))))
(def minus-sign-disambiguated-tokens2 parsed-tokens2)
@ -294,17 +293,17 @@
(deftest assemble-test-success
(testing "compiling successfully"
(is (= (assemble (join "\n" [line1 line2 line3]) register/reg-names)
(is (= (assemble (join "\n" [line1 line2 line3]))
multi-line-assembled))))
(deftest assemble-test-failure
(testing "assemble results in error"
(is (= (assemble (join "\n" [line1 line2 line3 line4]) register/reg-names)
(is (= (assemble (join "\n" [line1 line2 line3 line4]))
multi-line-assembled-error))))
(deftest preserving-line-and-pos-metadata-test
(testing "line and pos metadata preserved through assembly process"
(is (= (meta (get-in (assemble (join "\n" [line1 line2 line3]) register/reg-names)
(is (= (meta (get-in (assemble (join "\n" [line1 line2 line3]))
[:instrs 8 1]))
{:line 3, :pos 14}))))