robotwar/src/robotwar/foundry.clj

157 lines
5.8 KiB
Clojure

(ns robotwar.foundry
(:require [robotwar.kernel-lexicon])
(:use (clojure [string :only [split join]]
[pprint :only [pprint]])
[clojure.core.match :only [match]]))
(defn re-seq-with-pos
"Returns a lazy sequence of successive matches of pattern in string with position.
Largely stolen from re-seq source."
[^java.util.regex.Pattern re s]
(let [m (re-matcher re s)]
((fn step []
(when (.find m)
(cons [(re-groups m) (.start m)] (lazy-seq (step))))))))
(defn split-lines
[raw-lines]
(split raw-lines #"\n"))
(defn strip-comments
[lines]
(map #(re-find #"[^;]*" %) lines))
(def lex-re
(let [op-string (join robotwar.kernel-lexicon/op-commands)]
(re-pattern (str "[" op-string "]|[^" op-string "\\s]+"))))
(defn lex-line
"Helper function for lex. Note: metadata fields :line and :pos
are intended to be human-readable for error-reporting
purposes, so they're indexed from 1."
[line-num line]
(map (fn [[s n]]
^{:line (inc line-num), :pos (inc n)} {:token-str s})
(re-seq-with-pos lex-re line)))
(defn lex
"Lexes a sequence of lines. After this point, tokens
are no longer grouped by line (line numbers have been
captured in metadata, along with column numbers)."
[lines]
(apply concat (map-indexed lex-line lines)))
(defn str->int
"Integer/parseInt, but returns nil on failure"
[s-raw]
(try (Integer/parseInt s-raw)
(catch Exception e nil)))
(defn valid-word
"Capital letters and numbers, starting with a capital letter"
[s]
(re-matches #"^[A-Z][A-Z\d]*" s))
(def return-err (constantly "Invalid word or symbol"))
(defn parse-token
"takes a vector of reg-names and a token with a token-str field and parses the token.
needs to work with the original token map by using dissoc and into
(rather than building a new one) because it contains line and column
number metadata."
[reg-names {token-str :token-str :as token}]
(let [parser-priority
[[(set reg-names) :register]
[(set robotwar.kernel-lexicon/commands) :command]
[str->int :number]
[valid-word :label]
[return-err :error]]]
(some
(fn [[parser token-type]]
(when-let [token-val (parser token-str)]
(dissoc (into token {:val token-val, :type token-type})
:token-str)))
parser-priority)))
(defn parse
"take the tokens and convert them to structured source code ready for compiling.
if there's an error, returns a different type: just the token,
outside of any sequence."
[reg-names initial-tokens]
(loop [[token & tail :as tokens] initial-tokens
parsed-tokens []]
(if (empty? tokens)
parsed-tokens
(let [{token-type :type :as parsed-token} (parse-token reg-names token)]
(if (= token-type :error)
parsed-token
(recur tail (conj parsed-tokens parsed-token)))))))
(defn disambiguate-minus-signs
[initial-tokens]
(loop [tokens initial-tokens
results []]
(let [{prev-type :type} (last results)
[{current-val :val :as current-token}
& [{next-val :val, next-type :type :as next-token} :as tail]] tokens]
(cond
(empty? tokens) results
(and (= current-val "-")
(= next-type :number)
(not (#{:number :register} prev-type)))
(recur (rest tail)
(conj results (into current-token {:val (- next-val), :type :number})))
:otherwise (recur tail (conj results current-token))))))
(defn make-instr-pairs
"Compiles the tokens into token-pairs. Commands consume the next token.
When values are encountered that are not arguments to commands,
a special token-pair is created that is a comma followed by the value
(meaning push the value into the accumulator). The comma command re-uses
the same :line and :pos metadata from the token containing the value that is being pushed."
[initial-tokens]
(loop [[token & tail :as tokens] initial-tokens
result []]
(if (empty? tokens)
result
(match [token]
[{:type (:or :number :register)}]
(recur tail (conj result [(into token {:val ",", :type :command}) token]))
[(:or {:type :label} {:type :command, :val "ENDSUB"})]
(recur tail (conj result [token nil]))
[{:type :command}]
(recur (rest tail) (conj result [token (first tail)]))))))
; TODO: preserve :line and :pos metadata with labels,
; when labels are transferred from the instruction list to the label map
(defn map-labels
"Maps label-names to their appropriate indexes in the instruction list,
and remove the labels from the instruction list itself (except as targets)"
[initial-instrs]
(loop [[instr & tail :as instrs] initial-instrs
result {:labels {}
:instrs []}]
(if (empty? instrs)
result
(let [command (first instr)
next-instr-num (count (result :instrs))]
(if (= (command :type) :label)
(recur tail (assoc-in result [:labels (command :val)] next-instr-num))
(recur tail (assoc-in result [:instrs next-instr-num] instr)))))))
(defn assemble [reg-names string]
"compiles robotwar code, with error-checking beginning after the lexing
step. All functions that return errors will return a map with the keyword
:error, and then a token with a :val field containing the error string,
and metadata containing :pos and :line fields containing the location.
So far only parse implements error-checking."
(let [parse-with-reg-names (partial parse reg-names)
lexed (-> string split-lines strip-comments lex)]
(reduce (fn [result step]
(if (= (:type result) :error)
result
(step result)))
lexed
[parse-with-reg-names disambiguate-minus-signs make-instr-pairs map-labels])))