mirror of
https://github.com/g012/l65.git
synced 2025-02-01 11:32:48 +00:00
180 lines
5.9 KiB
Lua
180 lines
5.9 KiB
Lua
--[[--- A Lua lexical scanner using LPeg.
|
|
= CREDITS
|
|
Written by Peter Odding, 2007/04/04
|
|
|
|
= THANKS TO
|
|
- the Lua authors for a wonderful language;
|
|
- Roberto for LPeg;
|
|
- caffeine for keeping me awake :)
|
|
|
|
= LICENSE
|
|
Shamelessly ripped from the SQLite[3] project:
|
|
|
|
The author disclaims copyright to this source code. In place of a legal
|
|
notice, here is a blessing:
|
|
|
|
May you do good and not evil.
|
|
May you find forgiveness for yourself and forgive others.
|
|
May you share freely, never taking more than you give.
|
|
|
|
@module macro.lexer
|
|
--]]
|
|
|
|
local lexer = {}
|
|
local lpeg = require 'lpeg'
|
|
local P, R, S, C, Cb, Cc, Cg, Cmt, Ct =
|
|
lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Ct
|
|
|
|
-- create a pattern which captures the lua value [id] and the input matching
|
|
-- [patt] in a table
|
|
local function token(id, patt) return Ct(Cc(id) * C(patt)) end
|
|
|
|
-- private interface
|
|
local table_of_tokens
|
|
local extra_tokens
|
|
|
|
function lexer.add_extra_tokens(extra)
|
|
extra_tokens = extra_tokens or {}
|
|
for _,t in ipairs(extra) do
|
|
table.insert(extra_tokens,t)
|
|
end
|
|
table_of_tokens = nil -- re-initialize
|
|
end
|
|
|
|
function lexer.init ()
|
|
local digit = R('09')
|
|
|
|
-- range of valid characters after first character of identifier
|
|
--local idsafe = R('AZ', 'az', '\127\255') + P '_'
|
|
local idsafe = R('AZ', 'az') + P '_' + R '\206\223' * R '\128\255'
|
|
-- operators
|
|
local OT = P '=='
|
|
if extra_tokens then
|
|
for _,ex in ipairs(extra_tokens) do
|
|
OT = OT + P(ex)
|
|
end
|
|
end
|
|
local operator = token('operator', OT + P '.' + P '~=' + P '<=' + P '>=' + P '...'
|
|
+ P '..' + S '+-*/%^#=<>;:,.{}[]()')
|
|
-- identifiers
|
|
local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
|
|
|
|
-- keywords
|
|
local keyword = token('keyword', (P 'and' + P 'break' + P 'do' + P 'elseif' +
|
|
P 'else' + P 'end' + P 'false' + P 'for' + P 'function' + P 'if' +
|
|
P 'in' + P 'local' + P 'nil' + P 'not' + P 'or' + P 'repeat' + P 'return' +
|
|
P 'then' + P 'true' + P 'until' + P 'while') * -(idsafe + digit))
|
|
|
|
-- numbers
|
|
local number_sign = S'+-'^-1
|
|
local number_decimal = digit ^ 1
|
|
local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
|
|
local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
|
|
(S'eE' * number_sign * digit^1)^-1
|
|
local number = token('number', number_hexadecimal +
|
|
number_float +
|
|
number_decimal)
|
|
|
|
-- callback for [=[ long strings ]=]
|
|
-- ps. LPeg is for Lua what regex is for Perl, which makes me smile :)
|
|
local equals = P '=' ^ 0
|
|
local open = P '[' * Cg(equals, "init") * P '[' * P '\n' ^ -1
|
|
local close = P ']' * C(equals) * P ']'
|
|
local closeeq = Cmt(close * Cb "init", function (s, i, a, b) return a == b end)
|
|
local longstring = open * C((P(1) - closeeq)^0) * close --/ 1
|
|
|
|
-- strings
|
|
local singlequoted_string = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
|
|
local doublequoted_string = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
|
|
local string = token('string', singlequoted_string +
|
|
doublequoted_string +
|
|
longstring)
|
|
|
|
-- comments
|
|
local singleline_comment = P '--' * (1 - S '\r\n\f') ^ 0
|
|
local multiline_comment = P '--' * longstring
|
|
local comment = token('comment', multiline_comment + singleline_comment)
|
|
|
|
-- whitespace
|
|
local whitespace = token('space', S('\r\n\f\t ')^1)
|
|
|
|
-- ordered choice of all tokens and last-resort error which consumes one character
|
|
local any_token = whitespace + number + keyword + ident +
|
|
string + comment + operator + token('error', 1)
|
|
|
|
|
|
table_of_tokens = Ct(any_token ^ 0)
|
|
end
|
|
|
|
-- increment [line] by the number of line-ends in [text]
|
|
local function sync(line, text)
|
|
local index, limit = 1, #text
|
|
while index <= limit do
|
|
local start, stop = text:find('\r\n', index, true)
|
|
if not start then
|
|
start, stop = text:find('[\r\n\f]', index)
|
|
if not start then break end
|
|
end
|
|
index = stop + 1
|
|
line = line + 1
|
|
end
|
|
return line
|
|
end
|
|
lexer.sync = sync
|
|
|
|
lexer.line = 0
|
|
|
|
-- we only need to synchronize the line-counter for these token types
|
|
local multiline_tokens = { comment = true, string = true, space = true }
|
|
lexer.multiline_tokens = multiline_tokens
|
|
|
|
function lexer.scan_lua_tokenlist(input)
|
|
if not table_of_tokens then
|
|
lexer.init()
|
|
end
|
|
assert(type(input) == 'string', 'bad argument #1 (expected string)')
|
|
local line = 1
|
|
local tokens = lpeg.match(table_of_tokens, input)
|
|
for i, token in pairs(tokens) do
|
|
local t = token[1]
|
|
if t == 'operator' or t == 'error' then
|
|
token[1] = token[2]
|
|
end
|
|
token[3] = line
|
|
if multiline_tokens[t] then
|
|
line = sync(line, token[2])
|
|
end
|
|
end
|
|
return tokens
|
|
end
|
|
|
|
--- get a token iterator from a source containing Lua code.
|
|
-- Note that this token iterator includes spaces and comments, and does not convert
|
|
-- string and number tokens - so e.g. a string token is quoted and a number token is
|
|
-- an unconverted string.
|
|
-- @param input the source - can be a string or a file-like object (i.e. read() returns line)
|
|
-- @param name for the source
|
|
function lexer.scan_lua(input,name)
|
|
if type(input) ~= 'string' and input.read then
|
|
input = input:read('*a')
|
|
end
|
|
local tokens = lexer.scan_lua_tokenlist(input)
|
|
local i, n = 1, #tokens
|
|
return function(k)
|
|
if k ~= nil then
|
|
k = i + k
|
|
if k < 1 or k > n then return nil end
|
|
return tokens[k]
|
|
end
|
|
local tok = tokens[i]
|
|
i = i + 1
|
|
if tok then
|
|
lexer.line = tok[3]
|
|
lexer.name = name
|
|
return tok[1],tok[2]
|
|
end
|
|
end
|
|
end
|
|
|
|
return lexer
|