finished converting the lexer to regexp, eliminating need for core.match

This commit is contained in:
Richard Harrington 2013-07-13 13:00:38 -04:00
parent 5cbda73aba
commit 0df38b12b8
2 changed files with 25 additions and 43 deletions

View File

@ -3,6 +3,5 @@
:url "http://example.com/FIXME"
:license {:name "Eclipse Public License"
:url "http://www.eclipse.org/legal/epl-v10.html"}
:dependencies [[org.clojure/clojure "1.5.1"]
[org.clojure/core.match "0.2.0-rc3"]]
:dependencies [[org.clojure/clojure "1.5.1"]]
:main hs-robotwar.core)

View File

@ -1,6 +1,5 @@
(ns hs-robotwar.core)
(use '[clojure.core.match :only (match)])
(use '[clojure.set :only (union)])
(use '[clojure.string :only (split join)])
@ -14,46 +13,34 @@
(def commands (union operators-set #{"TO" "IF" "GOTO" "GOSUB" "ENDSUB"}))
; TODO: FINISH THIS METHOD, AND CONVERT THE WHOLE LEXER TO REGEX
(defn re-seq-with-pos
[initial-s]
(let [len (count initial-s)]
(loop [s initial-s, pos 0, acc []]
(cond
(empty? s) acc
(
"returns a sequence of 2-element vectors: [match position]"
[re initial-s]
(loop [s initial-s,
end-of-previous-word 0
acc []]
(if (empty? s)
acc
(if-let [next-match (re-find re s)]
(let [idx-start (.indexOf s next-match)
idx-end (+ idx-start (count next-match))]
(recur (subs s idx-end)
(+ end-of-previous-word idx-end)
(conj acc [next-match (+ end-of-previous-word idx-start)])))
acc))))
(defn conj-with-metadata
[coll s n]
(conj coll {:token-str s, :pos n}))
(defn build-lex-metadata
[s n]
{:token-str s, :pos n})
(defn lex-line
[initial-line]
(loop
[[head & tail :as line] initial-line
partial-token []
saved-pos 0
result []]
(let [close-partial-token (fn [] (conj-with-metadata result (apply str partial-token) saved-pos))
current-pos (- (count initial-line) (count line))
parsing-token? (not (empty? partial-token))]
(match [head parsing-token?]
[(:or \; nil) true ] (close-partial-token)
[(:or \; nil) false] result
[(:or \space \t) true ] (recur tail [] nil (close-partial-token))
[(:or \space \t) false] (recur tail [] nil result)
; if it's an operator and we're currently parsing a token,
; close the partial token and recur on the same character.
[(_ :guard operators) true ] (recur line [] nil (close-partial-token))
[(_ :guard operators) false] (recur tail
[]
nil
(conj-with-metadata result (str head) current-pos))
[_ true ] (recur tail (conj partial-token head) saved-pos result)
[_ false] (recur tail (conj partial-token head) current-pos result)))))
[line]
(map #(apply build-lex-metadata %)
(re-seq-with-pos lex-re line)))
(defn lex
[src-code]
(mapcat lex-line (split src-code #"\n")))
(defn merge-lexed-tokens
"helper function for conjoining minus signs to next token
@ -62,10 +49,6 @@
{:token-str (str (:token-str first-token) (:token-str second-token))
:pos (:pos first-token)})
(defn lex
[src-code]
(mapcat lex-line (split src-code #"\n")))
(defn str->int
"Like Integer/parseInt, but returns nil on failure"
[s]