1
0
mirror of https://github.com/g012/l65.git synced 2024-06-11 10:29:27 +00:00
l65/macro/lexer.lua

180 lines
5.9 KiB
Lua

--[[--- A Lua lexical scanner using LPeg.
= CREDITS
Written by Peter Odding, 2007/04/04
= THANKS TO
- the Lua authors for a wonderful language;
- Roberto for LPeg;
- caffeine for keeping me awake :)
= LICENSE
Shamelessly ripped from the SQLite[3] project:
The author disclaims copyright to this source code. In place of a legal
notice, here is a blessing:
May you do good and not evil.
May you find forgiveness for yourself and forgive others.
May you share freely, never taking more than you give.
@module macro.lexer
--]]
local lexer = {}
local lpeg = require 'lpeg'
local P, R, S, C, Cb, Cc, Cg, Cmt, Ct =
lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Ct
-- create a pattern which captures the lua value [id] and the input matching
-- [patt] in a table
local function token(id, patt) return Ct(Cc(id) * C(patt)) end
-- private interface
local table_of_tokens
local extra_tokens
function lexer.add_extra_tokens(extra)
extra_tokens = extra_tokens or {}
for _,t in ipairs(extra) do
table.insert(extra_tokens,t)
end
table_of_tokens = nil -- re-initialize
end
function lexer.init ()
local digit = R('09')
-- range of valid characters after first character of identifier
--local idsafe = R('AZ', 'az', '\127\255') + P '_'
local idsafe = R('AZ', 'az') + P '_' + R '\206\223' * R '\128\255'
-- operators
local OT = P '=='
if extra_tokens then
for _,ex in ipairs(extra_tokens) do
OT = OT + P(ex)
end
end
local operator = token('operator', OT + P '.' + P '~=' + P '<=' + P '>=' + P '...'
+ P '..' + S '+-*/%^#=<>;:,.{}[]()')
-- identifiers
local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
-- keywords
local keyword = token('keyword', (P 'and' + P 'break' + P 'do' + P 'elseif' +
P 'else' + P 'end' + P 'false' + P 'for' + P 'function' + P 'if' +
P 'in' + P 'local' + P 'nil' + P 'not' + P 'or' + P 'repeat' + P 'return' +
P 'then' + P 'true' + P 'until' + P 'while') * -(idsafe + digit))
-- numbers
local number_sign = S'+-'^-1
local number_decimal = digit ^ 1
local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
(S'eE' * number_sign * digit^1)^-1
local number = token('number', number_hexadecimal +
number_float +
number_decimal)
-- callback for [=[ long strings ]=]
-- ps. LPeg is for Lua what regex is for Perl, which makes me smile :)
local equals = P '=' ^ 0
local open = P '[' * Cg(equals, "init") * P '[' * P '\n' ^ -1
local close = P ']' * C(equals) * P ']'
local closeeq = Cmt(close * Cb "init", function (s, i, a, b) return a == b end)
local longstring = open * C((P(1) - closeeq)^0) * close --/ 1
-- strings
local singlequoted_string = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
local doublequoted_string = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
local string = token('string', singlequoted_string +
doublequoted_string +
longstring)
-- comments
local singleline_comment = P '--' * (1 - S '\r\n\f') ^ 0
local multiline_comment = P '--' * longstring
local comment = token('comment', multiline_comment + singleline_comment)
-- whitespace
local whitespace = token('space', S('\r\n\f\t ')^1)
-- ordered choice of all tokens and last-resort error which consumes one character
local any_token = whitespace + number + keyword + ident +
string + comment + operator + token('error', 1)
table_of_tokens = Ct(any_token ^ 0)
end
-- increment [line] by the number of line-ends in [text]
local function sync(line, text)
local index, limit = 1, #text
while index <= limit do
local start, stop = text:find('\r\n', index, true)
if not start then
start, stop = text:find('[\r\n\f]', index)
if not start then break end
end
index = stop + 1
line = line + 1
end
return line
end
lexer.sync = sync
lexer.line = 0
-- we only need to synchronize the line-counter for these token types
local multiline_tokens = { comment = true, string = true, space = true }
lexer.multiline_tokens = multiline_tokens
function lexer.scan_lua_tokenlist(input)
if not table_of_tokens then
lexer.init()
end
assert(type(input) == 'string', 'bad argument #1 (expected string)')
local line = 1
local tokens = lpeg.match(table_of_tokens, input)
for i, token in pairs(tokens) do
local t = token[1]
if t == 'operator' or t == 'error' then
token[1] = token[2]
end
token[3] = line
if multiline_tokens[t] then
line = sync(line, token[2])
end
end
return tokens
end
--- get a token iterator from a source containing Lua code.
-- Note that this token iterator includes spaces and comments, and does not convert
-- string and number tokens - so e.g. a string token is quoted and a number token is
-- an unconverted string.
-- @param input the source - can be a string or a file-like object (i.e. read() returns line)
-- @param name for the source
function lexer.scan_lua(input,name)
if type(input) ~= 'string' and input.read then
input = input:read('*a')
end
local tokens = lexer.scan_lua_tokenlist(input)
local i, n = 1, #tokens
return function(k)
if k ~= nil then
k = i + k
if k < 1 or k > n then return nil end
return tokens[k]
end
local tok = tokens[i]
i = i + 1
if tok then
lexer.line = tok[3]
lexer.name = name
return tok[1],tok[2]
end
end
end
return lexer