157 lines
5.8 KiB
Clojure
157 lines
5.8 KiB
Clojure
(ns robotwar.foundry
|
|
(:require [robotwar.kernel-lexicon])
|
|
(:use (clojure [string :only [split join]]
|
|
[pprint :only [pprint]])
|
|
[clojure.core.match :only [match]]))
|
|
|
|
(defn re-seq-with-pos
|
|
"Returns a lazy sequence of successive matches of pattern in string with position.
|
|
Largely stolen from re-seq source."
|
|
[^java.util.regex.Pattern re s]
|
|
(let [m (re-matcher re s)]
|
|
((fn step []
|
|
(when (.find m)
|
|
(cons [(re-groups m) (.start m)] (lazy-seq (step))))))))
|
|
|
|
(defn split-lines
|
|
[raw-lines]
|
|
(split raw-lines #"\n"))
|
|
|
|
(defn strip-comments
|
|
[lines]
|
|
(map #(re-find #"[^;]*" %) lines))
|
|
|
|
(def lex-re
|
|
(let [op-string (join robotwar.kernel-lexicon/op-commands)]
|
|
(re-pattern (str "[" op-string "]|[^" op-string "\\s]+"))))
|
|
|
|
(defn lex-line
|
|
"Helper function for lex. Note: metadata fields :line and :pos
|
|
are intended to be human-readable for error-reporting
|
|
purposes, so they're indexed from 1."
|
|
[line-num line]
|
|
(map (fn [[s n]]
|
|
^{:line (inc line-num), :pos (inc n)} {:token-str s})
|
|
(re-seq-with-pos lex-re line)))
|
|
|
|
(defn lex
|
|
"Lexes a sequence of lines. After this point, tokens
|
|
are no longer grouped by line (line numbers have been
|
|
captured in metadata, along with column numbers)."
|
|
[lines]
|
|
(apply concat (map-indexed lex-line lines)))
|
|
|
|
(defn str->int
|
|
"Integer/parseInt, but returns nil on failure"
|
|
[s-raw]
|
|
(try (Integer/parseInt s-raw)
|
|
(catch Exception e nil)))
|
|
|
|
(defn valid-word
|
|
"Capital letters and numbers, starting with a capital letter"
|
|
[s]
|
|
(re-matches #"^[A-Z][A-Z\d]*" s))
|
|
|
|
(def return-err (constantly "Invalid word or symbol"))
|
|
|
|
(defn parse-token
|
|
"takes a vector of reg-names and a token with a token-str field and parses the token.
|
|
needs to work with the original token map by using dissoc and into
|
|
(rather than building a new one) because it contains line and column
|
|
number metadata."
|
|
[reg-names {token-str :token-str :as token}]
|
|
(let [parser-priority
|
|
[[(set reg-names) :register]
|
|
[(set robotwar.kernel-lexicon/commands) :command]
|
|
[str->int :number]
|
|
[valid-word :label]
|
|
[return-err :error]]]
|
|
(some
|
|
(fn [[parser token-type]]
|
|
(when-let [token-val (parser token-str)]
|
|
(dissoc (into token {:val token-val, :type token-type})
|
|
:token-str)))
|
|
parser-priority)))
|
|
|
|
(defn parse
|
|
"take the tokens and convert them to structured source code ready for compiling.
|
|
if there's an error, returns a different type: just the token,
|
|
outside of any sequence."
|
|
[reg-names initial-tokens]
|
|
(loop [[token & tail :as tokens] initial-tokens
|
|
parsed-tokens []]
|
|
(if (empty? tokens)
|
|
parsed-tokens
|
|
(let [{token-type :type :as parsed-token} (parse-token reg-names token)]
|
|
(if (= token-type :error)
|
|
parsed-token
|
|
(recur tail (conj parsed-tokens parsed-token)))))))
|
|
|
|
(defn disambiguate-minus-signs
|
|
[initial-tokens]
|
|
(loop [tokens initial-tokens
|
|
results []]
|
|
(let [{prev-type :type} (last results)
|
|
[{current-val :val :as current-token}
|
|
& [{next-val :val, next-type :type :as next-token} :as tail]] tokens]
|
|
(cond
|
|
(empty? tokens) results
|
|
(and (= current-val "-")
|
|
(= next-type :number)
|
|
(not (#{:number :register} prev-type)))
|
|
(recur (rest tail)
|
|
(conj results (into current-token {:val (- next-val), :type :number})))
|
|
:otherwise (recur tail (conj results current-token))))))
|
|
|
|
(defn make-instr-pairs
|
|
"Compiles the tokens into token-pairs. Commands consume the next token.
|
|
When values are encountered that are not arguments to commands,
|
|
a special token-pair is created that is a comma followed by the value
|
|
(meaning push the value into the accumulator). The comma command re-uses
|
|
the same :line and :pos metadata from the token containing the value that is being pushed."
|
|
[initial-tokens]
|
|
(loop [[token & tail :as tokens] initial-tokens
|
|
result []]
|
|
(if (empty? tokens)
|
|
result
|
|
(match [token]
|
|
[{:type (:or :number :register)}]
|
|
(recur tail (conj result [(into token {:val ",", :type :command}) token]))
|
|
[(:or {:type :label} {:type :command, :val "ENDSUB"})]
|
|
(recur tail (conj result [token nil]))
|
|
[{:type :command}]
|
|
(recur (rest tail) (conj result [token (first tail)]))))))
|
|
|
|
; TODO: preserve :line and :pos metadata with labels,
|
|
; when labels are transferred from the instruction list to the label map
|
|
|
|
(defn map-labels
|
|
"Maps label-names to their appropriate indexes in the instruction list,
|
|
and remove the labels from the instruction list itself (except as targets)"
|
|
[initial-instrs]
|
|
(loop [[instr & tail :as instrs] initial-instrs
|
|
result {:labels {}
|
|
:instrs []}]
|
|
(if (empty? instrs)
|
|
result
|
|
(let [command (first instr)
|
|
next-instr-num (count (result :instrs))]
|
|
(if (= (command :type) :label)
|
|
(recur tail (assoc-in result [:labels (command :val)] next-instr-num))
|
|
(recur tail (assoc-in result [:instrs next-instr-num] instr)))))))
|
|
|
|
(defn assemble [reg-names string]
|
|
"compiles robotwar code, with error-checking beginning after the lexing
|
|
step. All functions that return errors will return a map with the keyword
|
|
:error, and then a token with a :val field containing the error string,
|
|
and metadata containing :pos and :line fields containing the location.
|
|
So far only parse implements error-checking."
|
|
(let [parse-with-reg-names (partial parse reg-names)
|
|
lexed (-> string split-lines strip-comments lex)]
|
|
(reduce (fn [result step]
|
|
(if (= (:type result) :error)
|
|
result
|
|
(step result)))
|
|
lexed
|
|
[parse-with-reg-names disambiguate-minus-signs make-instr-pairs map-labels])))
|