added logic in assembler to distinguish between registers and labels

and thus succeeded in getting rid of the register dependency in the assembler namespace.
2024-05-28 23:41:31 +00:00 · 2013-08-10 01:34:03 -04:00 · 2013-08-10 01:34:03 -04:00 · 94cf5b3f47
commit 94cf5b3f47
parent 528b18a45d
3 changed files with 91 additions and 72 deletions
--- a/src/robotwar/assembler.clj
+++ b/src/robotwar/assembler.clj
@ -38,11 +38,10 @@
       (re-seq-with-pos lex-re line)))

 (defn lex
-  "Lexes a sequence of lines. After this point, tokens 
-  are no longer grouped by line (line numbers have been 
-  captured in metadata, along with column numbers)."
+  "Lexes a sequence of lines into a sequence of sequences of tokens
+  (referred to in docstrings for parsing functions as lines of tokens)."
  [lines]
-  (apply concat (map-indexed lex-line lines)))
+  (map-indexed lex-line lines))

 (defn str->int
  "Integer/parseInt, but returns nil on failure"
@ -58,16 +57,15 @@
 (def return-err (constantly "Invalid word or symbol"))

 (defn parse-token
-  "takes a vector of reg-names and a token with a token-str field and parses the token.
+  "parses a token with a token-str field.
  needs to work with the original token map by using dissoc and into
  (rather than building a new one) because it contains line and column
  number metadata."
-  [{token-str :token-str :as token} reg-names]
+  [{token-str :token-str :as token}]
  (let [parser-priority 
-        [[(set reg-names)  :register]
-         [(set commands)   :command]
+        [[(set commands)   :command]
         [str->int         :number]
-         [valid-word       :label]
+         [valid-word       :identifier]
         [return-err       :error]]]
    (some
      (fn [[parser token-type]]
@ -76,19 +74,42 @@
                  :token-str)))
      parser-priority)))

-(defn parse
-  "take the tokens and convert them to structured source code ready for compiling.
-  if there's an error, returns a different type: just the token,
-  outside of any sequence."
-  [initial-tokens reg-names]
+(defn parse-line
+  "takes a line of tokens and runs each token through parse-token for the first
+  pass of determining its type. Then parse-line further divides :identifier
+  tokens into two types: :label if it's the only thing on its line or it follows
+  a 'GOTO' or a 'GOSUB', and :register otherwise.
+  If we encounter an error, just return the token, not a sequence of tokens."
+  [initial-tokens]
  (loop [[token & tail :as tokens] initial-tokens
         parsed-tokens []]
    (if (empty? tokens)
      parsed-tokens
-      (let [{token-type :type :as parsed-token} (parse-token token reg-names)]
-        (if (= token-type :error)
-          parsed-token
-          (recur tail (conj parsed-tokens parsed-token)))))))
+      (let [{token-type :type token-val :val :as parsed-token} (parse-token token)]
+        (case token-type
+         :error parsed-token
+         (:command :number) (recur tail (conj parsed-tokens parsed-token))
+         :identifier (if (or (= (count initial-tokens) 1)
+                             (#{"GOTO" "GOSUB"} (:val (last parsed-tokens))))
+                       (recur tail (conj parsed-tokens (assoc parsed-token :type :label)))
+                       (recur tail (conj parsed-tokens (assoc parsed-token :type :register)))))))))
+       
+(defn parse
+  "take the lines of tokens and converts them to :val and :type format.
+  After this point, tokens are no longer separated into sequences of sequences
+  according to the linebreaks in the original source code -- 
+  if we need that information later for error reporting, it's in the metadata.
+  if there's an error, this function just returns the token,
+  outside of any sequence."
+  [initial-token-lines]
+  (loop [[token-line & tail :as token-lines] initial-token-lines
+         parsed-token-lines []]
+    (if (empty? token-lines)
+      parsed-token-lines
+      (let [parsed-line (parse-line token-line)]
+        (if (= (:type parsed-line) :error)
+          parsed-line
+          (recur tail (concat parsed-token-lines parsed-line)))))))

 (defn disambiguate-minus-signs
  [initial-tokens]
@ -118,12 +139,12 @@
    (if (empty? tokens)
      result
      (match [token]
-             [{:type (:or :number :register)}] 
-             (recur tail (conj result [(into token {:val ",", :type :command}) token]))
-             [(:or {:type :label} {:type :command, :val "ENDSUB"})]
-             (recur tail (conj result [token nil]))
-             [{:type :command}] 
-             (recur (rest tail) (conj result [token (first tail)]))))))
+        [{:type (:or :number :register)}] 
+          (recur tail (conj result [(into token {:val ",", :type :command}) token]))
+        [(:or {:type :label} {:type :command, :val "ENDSUB"})]
+          (recur tail (conj result [token nil]))
+        [{:type :command}] 
+          (recur (rest tail) (conj result [token (first tail)]))))))

 ; TODO: preserve :line and :pos metadata with labels,
 ; when labels are transferred from the instruction list to the label map
@ -143,17 +164,16 @@
          (recur tail (assoc-in result [:labels (command :val)] next-instr-num))
          (recur tail (assoc-in result [:instrs next-instr-num] instr)))))))

-(defn assemble [src-code reg-names]
+(defn assemble [src-code]
  "compiles robotwar code, with error-checking beginning after the lexing
  step. All functions that return errors will return a map with the keyword 
  :error, and then a token with a :val field containing the error string, 
  and metadata containing :pos and :line fields containing the location. 
  So far only parse implements error-checking."
-  (let [parse-with-reg-names #(parse % reg-names)
-        lexed (-> src-code split-lines strip-comments lex)]
+  (let [lexed (-> src-code split-lines strip-comments lex)]
    (reduce (fn [result step]
              (if (= (:type result) :error)
                result
                (step result)))
            lexed
-            [parse-with-reg-names disambiguate-minus-signs make-instr-pairs map-labels])))
+            [parse disambiguate-minus-signs make-instr-pairs map-labels])))
--- a/src/robotwar/brain.clj
+++ b/src/robotwar/brain.clj
@ -15,7 +15,7 @@
   :instr-ptr 0
   :call-stack []
   :registers registers
-   :obj-code (assembler/assemble src-code (keys registers))})
+   :obj-code (assembler/assemble src-code)})

 (defn resolve-arg [{arg-val :val arg-type :type} registers labels world read-register]
  "resolves an instruction argument to a numeric value
--- a/test/robotwar/assembler_test.clj
+++ b/test/robotwar/assembler_test.clj
@ -1,8 +1,7 @@
 (ns robotwar.assembler-test
  (:use (clojure [string :only [join]]
                 [test])
-        [robotwar.assembler])
-  (:require [robotwar.register :as register]))
+        [robotwar.assembler]))

 (def line1 "IF DAMAGE # D GOTO MOVE    ; comment or something")
 (def line2 "AIM-17 TO AIM              ; other comment")
@ -15,37 +14,37 @@
 (def line-no-comments3 "IF X<-5 GOTO SCAN")

 (def multi-line ["SCAN" "6 TO AIM"])
-(def lexed-multi-line [{:token-str "SCAN"}
-                       {:token-str "6"}
-                       {:token-str "TO"}
-                       {:token-str "AIM"}])
+(def lexed-multi-line [[{:token-str "SCAN"}]
+                       [{:token-str "6"}
+                        {:token-str "TO"}
+                        {:token-str "AIM"}]])

-(def lexed-tokens1 [{:token-str "IF"} 
-                    {:token-str "DAMAGE"} 
-                    {:token-str "#"} 
-                    {:token-str "D"} 
-                    {:token-str "GOTO"} 
-                    {:token-str "MOVE"}])
+(def lexed-tokens1 [[{:token-str "IF"} 
+                     {:token-str "DAMAGE"} 
+                     {:token-str "#"} 
+                     {:token-str "D"} 
+                     {:token-str "GOTO"} 
+                     {:token-str "MOVE"}]])

-(def lexed-tokens2 [{:token-str "AIM"} 
-                    {:token-str "-"} 
-                    {:token-str "17"} 
-                    {:token-str "TO"} 
-                    {:token-str "AIM"}])
+(def lexed-tokens2 [[{:token-str "AIM"} 
+                     {:token-str "-"} 
+                     {:token-str "17"} 
+                     {:token-str "TO"} 
+                     {:token-str "AIM"}]])

-(def lexed-tokens3 [{:token-str "IF"} 
-                    {:token-str "X"} 
-                    {:token-str "<"} 
-                    {:token-str "-"} 
-                    {:token-str "5"} 
-                    {:token-str "GOTO"} 
-                    {:token-str "SCAN"}])
+(def lexed-tokens3 [[{:token-str "IF"} 
+                     {:token-str "X"} 
+                     {:token-str "<"} 
+                     {:token-str "-"} 
+                     {:token-str "5"} 
+                     {:token-str "GOTO"} 
+                     {:token-str "SCAN"}]])

-(def lexed-tokens4 [{:token-str "AIM"} 
-                    {:token-str "@"} 
-                    {:token-str "17"} 
-                    {:token-str "TO"} 
-                    {:token-str "AIM"}])
+(def lexed-tokens4 [[{:token-str "AIM"} 
+                     {:token-str "@"} 
+                     {:token-str "17"} 
+                     {:token-str "TO"} 
+                     {:token-str "AIM"}]])

 (def parsed-tokens2 [{:val "AIM", :type :register} 
                     {:val "-", :type :command} 
@ -212,47 +211,47 @@

 (deftest parse-token-register
  (testing "parsing register token"
-    (is (= (parse-token {:token-str "AIM"} register/reg-names)
-           {:val "AIM", :type :register}))))
+    (is (= (parse-token {:token-str "AIM"})
+           {:val "AIM", :type :identifier}))))

 (deftest parse-token-command-word
  (testing "parsing command token (word)"
-    (is (= (parse-token {:token-str "GOTO"} register/reg-names)
+    (is (= (parse-token {:token-str "GOTO"})
           {:val "GOTO", :type :command}))))

 (deftest parse-token-command-operator
  (testing "parsing command token (operator)"
-    (is (= (parse-token {:token-str "#"} register/reg-names)
+    (is (= (parse-token {:token-str "#"})
           {:val "#", :type :command}))))

 (deftest parse-token-number
  (testing "parsing number token"
-    (is (= (parse-token {:token-str "-17"} register/reg-names)
+    (is (= (parse-token {:token-str "-17"})
           {:val -17, :type :number}))))

 (deftest parse-token-label
  (testing "parsing label token"
-    (is (= (parse-token {:token-str "SCAN"} register/reg-names)
-           {:val "SCAN", :type :label}))))
+    (is (= (parse-token {:token-str "SCAN"})
+           {:val "SCAN", :type :identifier}))))

 (deftest parse-token-error
  (testing "parsing error token"
-    (is (= (parse-token {:token-str "-GOTO"} register/reg-names)
+    (is (= (parse-token {:token-str "-GOTO"})
           {:val "Invalid word or symbol", :type :error}))))

 (deftest parse-tokens-minus-sign
  (testing "parsing tokens with a binary minus sign"
-    (is (= (parse lexed-tokens2 register/reg-names)
+    (is (= (parse lexed-tokens2)
           parsed-tokens2))))

 (deftest parse-tokens-negative-sign
  (testing "parsing tokens with a unary negative sign"
-    (is (= (parse lexed-tokens3 register/reg-names)
+    (is (= (parse lexed-tokens3)
           parsed-tokens3))))

 (deftest parse-tokens-error
  (testing "parsing tokens with an invalid operator"
-    (is (= (parse lexed-tokens4 register/reg-names)
+    (is (= (parse lexed-tokens4)
           parsed-tokens4))))

 (def minus-sign-disambiguated-tokens2 parsed-tokens2)
@ -294,17 +293,17 @@

 (deftest assemble-test-success
  (testing "compiling successfully"
-    (is (= (assemble (join "\n" [line1 line2 line3]) register/reg-names)
+    (is (= (assemble (join "\n" [line1 line2 line3]))
           multi-line-assembled))))

 (deftest assemble-test-failure
  (testing "assemble results in error"
-    (is (= (assemble (join "\n" [line1 line2 line3 line4]) register/reg-names)
+    (is (= (assemble (join "\n" [line1 line2 line3 line4]))
           multi-line-assembled-error))))

 (deftest preserving-line-and-pos-metadata-test
  (testing "line and pos metadata preserved through assembly process"
-    (is (= (meta (get-in (assemble (join "\n" [line1 line2 line3]) register/reg-names)
+    (is (= (meta (get-in (assemble (join "\n" [line1 line2 line3]))
                         [:instrs 8 1]))
           {:line 3, :pos 14}))))