mirror of
https://github.com/g012/l65.git
synced 2025-04-08 15:39:24 +00:00
Added local labels.
This commit is contained in:
parent
a602795911
commit
663040a62c
94
6502.lua
94
6502.lua
@ -8,13 +8,11 @@ M.__index = M
|
||||
symbols.__index = symbols
|
||||
setmetatable(M, symbols)
|
||||
|
||||
local location_current -- cache of last location, for faster access
|
||||
local section_current -- cache of last location's last section, for faster access
|
||||
|
||||
M.link = function()
|
||||
assert(not stats.unused, "can't link twice")
|
||||
if stats.unused then return end
|
||||
|
||||
stats.unused = 0
|
||||
stats.cycles = 0
|
||||
for _,location in ipairs(locations) do
|
||||
local sections = location.sections
|
||||
|
||||
@ -34,14 +32,17 @@ M.link = function()
|
||||
-- filter sections list
|
||||
local position_independent_sections = {}
|
||||
local symbols_to_remove = {}
|
||||
location.cycles=0
|
||||
for ix,section in ipairs(sections) do
|
||||
section:compute_size()
|
||||
location.cycles = location.cycles + section.cycles
|
||||
if section.size == 0 then
|
||||
sections[ix]=nil
|
||||
if not section.org then table.insert(symbols_to_remove, section.label) end
|
||||
elseif not section.org then table.insert(position_independent_sections, section) end
|
||||
end
|
||||
for _,v in ipairs(symbols_to_remove) do symbols[v] = nil end
|
||||
stats.cycles = stats.cycles + location.cycles
|
||||
|
||||
-- fixed position sections
|
||||
for section_ix,section in ipairs(sections) do if section.org then
|
||||
@ -116,9 +117,23 @@ M.link = function()
|
||||
end
|
||||
end
|
||||
|
||||
M.resolve = function()
|
||||
if stats.resolved_count then return end
|
||||
M.link()
|
||||
|
||||
stats.resolved_count = 0
|
||||
local count = 0
|
||||
for k,v in pairs(symbols) do
|
||||
local t = type(v)
|
||||
if v == 'function' then symbols[k] = v() count=count+1
|
||||
elseif v == 'table' and type(v.resolve) == 'function' then symbols[k] = v.resolve() count=count+1 end
|
||||
end
|
||||
stats.resolved_count = count
|
||||
end
|
||||
|
||||
M.genbin = function(filler)
|
||||
if not filler then filler = 0xff end
|
||||
if not stats.unused then M.link() end
|
||||
M.resolve()
|
||||
local bin = {}
|
||||
local ins = table.insert
|
||||
table.sort(locations, function(a,b) return a.start < b.start end)
|
||||
@ -136,8 +151,8 @@ M.genbin = function(filler)
|
||||
for i=#bin,section.org do ins(bin, filler) end
|
||||
for _,instruction in ipairs(section.instructions) do
|
||||
if instruction.bin then for _,b in ipairs(instruction.bin) do ins(bin, b) end
|
||||
else instruction.asbin(bin) end
|
||||
M.size=#bin M.cycles=M.cycles+instruction.cycles
|
||||
elseif instruction.asbin then instruction.asbin(bin) end
|
||||
M.size=#bin M.cycles=M.cycles+(instruction.cycles or 0)
|
||||
end
|
||||
end
|
||||
if math.type(location.size) == 'integer' then
|
||||
@ -172,7 +187,7 @@ end
|
||||
M.location = function(start, finish)
|
||||
if type(start) == 'table' then
|
||||
for _,v in ipairs(locations) do if v == start then
|
||||
location_current = start
|
||||
M.location_current = start
|
||||
return start
|
||||
end end
|
||||
error("unable to find reference to location [" .. (start.start or '?') .. ", " .. (start.finish or '?') .. "]")
|
||||
@ -180,7 +195,7 @@ M.location = function(start, finish)
|
||||
local size = (finish or math.huge) - start
|
||||
local location = { start=start, finish=finish, chunks={ { start=start, size=size } } }
|
||||
locations[#locations+1] = location
|
||||
location_current = location
|
||||
M.location_current = location
|
||||
return location
|
||||
end
|
||||
|
||||
@ -193,28 +208,30 @@ M.section = function(t)
|
||||
section=t section.label=t[1] section[1]=nil
|
||||
if section.offset and not section.align then error("section " .. section.label .. " has offset, but no align") end
|
||||
end
|
||||
table.insert(location_current.sections, section)
|
||||
table.insert(M.location_current.sections, section)
|
||||
if symbols[section.label] then error("duplicate symbol: " .. section.label) end
|
||||
symbols[section.label] = section
|
||||
section_current = section
|
||||
M.label_current = section.label
|
||||
M.section_current = section
|
||||
section.type = 'section'
|
||||
section.constraints = {}
|
||||
section.instructions = {}
|
||||
function section:compute_size()
|
||||
local instructions = self.instructions
|
||||
local size = 0
|
||||
self.size=0 self.cycles=0
|
||||
for _,instruction in ipairs(instructions) do
|
||||
instruction.offset = size
|
||||
if not instruction.size then
|
||||
local ins_sz = instruction.size or 0
|
||||
if type(ins_sz) == 'function' then
|
||||
-- evaluation is needed to get the size (distinguish zpg/abs)
|
||||
-- labels and sections are not resolved at this point, so
|
||||
-- evaluation will fail if the size is not explicitly stated (.b/.w);
|
||||
-- in that case, assume max size
|
||||
instruction.bin={} instruction.asbin(instruction.bin)
|
||||
ins_sz = ins_sz()
|
||||
end
|
||||
size = size + instruction.size
|
||||
self.size = self.size + ins_sz
|
||||
self.cycles = self.cycles + (instruction.cycles or 0)
|
||||
end
|
||||
self.size = size
|
||||
for _,constraint in ipairs(self.constraints) do
|
||||
constraint.start = instructions[constraint.from].offset
|
||||
constraint.finish = instructions[constraint.to].offset
|
||||
@ -224,23 +241,35 @@ M.section = function(t)
|
||||
end
|
||||
|
||||
M.label = function(name)
|
||||
local label = { type='label', label=name }
|
||||
table.insert(section_current.instructions, label)
|
||||
local eval,resolve,label,offset
|
||||
label = { type='label', size=eval, resolve=resolve }
|
||||
if name:sub(1,1) == '_' then -- local label
|
||||
name = M.label_current .. name
|
||||
else
|
||||
M.label_current = name
|
||||
end
|
||||
if symbols[name] then error("duplicate symbol: " .. name) end
|
||||
symbols[name] = label
|
||||
eval = function()
|
||||
offset = M.section_current.size
|
||||
label.size = 0
|
||||
return 0
|
||||
end
|
||||
resolve = function() return M.section_current.org + offset end
|
||||
table.insert(M.section_current.instructions, label)
|
||||
return label
|
||||
end
|
||||
|
||||
M.samepage = function()
|
||||
local section = section_current
|
||||
local section = M.section_current
|
||||
table.insert(section.constraints, { type='samepage', from=#section.instructions+1 })
|
||||
end
|
||||
M.crosspage = function()
|
||||
local section = section_current
|
||||
local section = M.section_current
|
||||
table.insert(section.constraints, { type='crosspage', from=#section.instructions+1 })
|
||||
end
|
||||
M.endpage = function()
|
||||
local section = section_current
|
||||
local section = M.section_current
|
||||
local constraint = section.constraints[#section.constraints]
|
||||
assert(constraint and not constraint.finish, "closing constraint, but no constraint is open")
|
||||
constraint.to = #section.instructions
|
||||
@ -303,7 +332,7 @@ M.byte_impl = function(args, nrm)
|
||||
b[#b+1] = nrm(v)
|
||||
end
|
||||
end
|
||||
table.insert(section_current.instructions, { data=data, size=#data, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { data=data, size=#data, asbin=asbin })
|
||||
end
|
||||
-- byte(...)
|
||||
-- Declare bytes to go into the binary stream.
|
||||
@ -319,7 +348,7 @@ M.byte = function(...)
|
||||
end
|
||||
local byte_encapsulate = function(args)
|
||||
for k,v in ipairs(args) do
|
||||
if type(v) == 'table' and v.type == 'section' or v.type == 'label' then
|
||||
if type(v) == 'table' and (v.type == 'section' or v.type == 'label') then
|
||||
args[k] = function() return symbols[v.label] end
|
||||
end
|
||||
end
|
||||
@ -359,7 +388,7 @@ M.word = function(...)
|
||||
b[#b+1] = v>>8
|
||||
end
|
||||
end
|
||||
table.insert(section_current.instructions, { data=data, size=#data*2, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { data=data, size=#data*2, asbin=asbin })
|
||||
end
|
||||
|
||||
local op,cycles_def = function(code, cycles, extra_on_crosspage)
|
||||
@ -375,7 +404,7 @@ cycles_def=2 local opimp={
|
||||
for k,v in pairs(opimp) do
|
||||
M[k .. 'imp'] = function()
|
||||
local asbin = function(b) b[#b+1] = v.opc end
|
||||
table.insert(section_current.instructions, { size=1, cycles=v.cycles, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { size=1, cycles=v.cycles, asbin=asbin })
|
||||
end
|
||||
end
|
||||
cycles_def=2 local opimm={
|
||||
@ -390,7 +419,7 @@ for k,v in pairs(opimm) do
|
||||
x = byte_normalize(type(late) == 'function' and late(x) or x+late)
|
||||
b[#b+1]=v.opc b[#b+1]=x
|
||||
end
|
||||
table.insert(section_current.instructions, { size=2, cycles=2, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { size=2, cycles=2, asbin=asbin })
|
||||
end
|
||||
end
|
||||
cycles_def=3 local opzpg={
|
||||
@ -407,7 +436,7 @@ for k,v in pairs(opzpg) do
|
||||
x = byte_normalize(type(late) == 'function' and late(x) or x+late)
|
||||
b[#b+1]=v.opc b[#b+1]=x
|
||||
end
|
||||
table.insert(section_current.instructions, { size=2, cycles=v.cycles, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { size=2, cycles=v.cycles, asbin=asbin })
|
||||
end
|
||||
end
|
||||
cycles_def=4 local opabs={
|
||||
@ -424,7 +453,7 @@ for k,v in pairs(opabs) do
|
||||
x = word_normalize(type(late) == 'function' and late(x) or x+late)
|
||||
b[#b+1]=v.opc b[#b+1]=x&0xff b[#b+1]=x>>8
|
||||
end
|
||||
table.insert(section_current.instructions, { size=3, cycles=v.cycles, asbin=asbin })
|
||||
table.insert(M.section_current.instructions, { size=3, cycles=v.cycles, asbin=asbin })
|
||||
end
|
||||
M[k .. 'zab'] = function(late, early)
|
||||
if type(late) ~= 'function' then
|
||||
@ -451,13 +480,12 @@ for k,v in pairs(opabs) do
|
||||
return 3
|
||||
end
|
||||
asbin = function(b)
|
||||
-- TODO force absolute ?
|
||||
local x = word_normalize(late(early or 0))
|
||||
local op = opzpg[k]
|
||||
if x <= 0xff and op then b[#b+1]=op.opc b[#b+1]=x
|
||||
else b[#b+1]=v.opc b[#b+1]=x&0xff b[#b+1]=x>>8 end
|
||||
-- since we assumed absolute on link phase, we must generate absolute in binary
|
||||
if x <= 0xff and opzpg[k] then print("warning: forcing abs on zpg operand for opcode " .. k) end
|
||||
b[#b+1]=v.opc b[#b+1]=x&0xff b[#b+1]=x>>8
|
||||
end
|
||||
table.insert(section_current.instructions, ins)
|
||||
table.insert(M.section_current.instructions, ins)
|
||||
end
|
||||
end
|
||||
|
||||
|
4
asm.l65
4
asm.l65
@ -53,7 +53,7 @@ ptr_table("ptrs", message, data, 0)
|
||||
ldx #15,3
|
||||
|
||||
local kernel_cycles,kernel_size
|
||||
hook(function() kernel_cycles=cycles kernel_size=size end)
|
||||
table.insert(section_current.instructions, { asbin=function() kernel_cycles=cycles kernel_size=size end })
|
||||
|
||||
lda data
|
||||
lda data,5
|
||||
@ -71,7 +71,7 @@ ptr_table("ptrs", message, data, 0)
|
||||
jmp (INTIM-4)
|
||||
|
||||
-- cycles are counted without taking any branch
|
||||
hook(function() print('kernel cycles: ', cycles-kernel_cycles, 'kernel size: ', size-kernel_size) end)
|
||||
table.insert(section_current.instructions, { asbin=function() print('kernel cycles: ', cycles-kernel_cycles, 'kernel size: ', size-kernel_size) end })
|
||||
|
||||
lda function(c) return data * c end, v
|
||||
lda \c(data*c), v
|
||||
|
@ -1,24 +0,0 @@
|
||||
--[[
|
||||
6502 assembler macros.
|
||||
]]
|
||||
|
||||
local M = require 'macro'
|
||||
|
||||
local val = function(s)
|
||||
end
|
||||
|
||||
local op_imm = {}
|
||||
|
||||
local op = {
|
||||
lda = function(get,put)
|
||||
get:expecting 'space'
|
||||
local t,v = get:next()
|
||||
if t ~= '#' then return nil,true end
|
||||
t,v = get:block(nil, {['\n']=true,['--']=true})
|
||||
return put 'lda.imm(' :tokenlist t ')' :token v
|
||||
--v = get:upto '\n' --'[;\n(--)]'
|
||||
--return ('lda.imm(%s)\n'):format(v)
|
||||
end,
|
||||
}
|
||||
|
||||
for k,v in pairs(op) do M.define(k,v) end
|
320
macro/Getter.lua
320
macro/Getter.lua
@ -1,320 +0,0 @@
|
||||
--- Getter class. Used to get values from the token stream. The first
|
||||
-- argument `get` of a macro substitution function is of this type.
|
||||
--
|
||||
-- M.define ('\\',function(get,put)
|
||||
-- local args, body = get:idens('('), get:list()
|
||||
-- return put:keyword 'function' '(' : idens(args) ')' :
|
||||
-- keyword 'return' : list(body) : space() : keyword 'end'
|
||||
-- end)
|
||||
--
|
||||
-- The second argument `put` is a `TokenList` object.
|
||||
-- @see macro.TokenList
|
||||
-- @module macro.Getter
|
||||
|
||||
local TokenList = require 'macro.TokenList'
|
||||
local append = table.insert
|
||||
local setmetatable = setmetatable
|
||||
|
||||
local Getter = {
|
||||
__call = function(self)
|
||||
return self.fun()
|
||||
end
|
||||
}
|
||||
local M = Getter
|
||||
|
||||
Getter.__index = Getter;
|
||||
|
||||
local scan_iter
|
||||
|
||||
function Getter.new (get)
|
||||
return setmetatable({fun=get},Getter)
|
||||
end
|
||||
|
||||
function Getter.from_tl(tl)
|
||||
return Getter.new(scan_iter(tl))
|
||||
end
|
||||
|
||||
local Tok = {
|
||||
__tostring = function(self)
|
||||
return self[2]
|
||||
end
|
||||
}
|
||||
|
||||
local function tok_new (t)
|
||||
return setmetatable(t,Tok)
|
||||
end
|
||||
|
||||
-- create a token iterator out of a token list
|
||||
function Getter.scan_iter (tlist)
|
||||
local i,n = 1,#tlist
|
||||
return function(k)
|
||||
if k ~= nil then
|
||||
k = i + k
|
||||
if k < 1 or k > n then return nil end
|
||||
return tlist[k]
|
||||
end
|
||||
local tv = tlist[i]
|
||||
if tv == nil then return nil end
|
||||
i = i + 1
|
||||
return tv[1],tv[2]
|
||||
end
|
||||
end
|
||||
|
||||
scan_iter = Getter.scan_iter
|
||||
|
||||
--- get the next non-whitespace token.
|
||||
-- @return token type
|
||||
-- @return token value
|
||||
-- @function Getter.next
|
||||
function Getter.next(get)
|
||||
local t,v = get()
|
||||
while t == 'space' or t == 'comment' do
|
||||
t,v = get()
|
||||
end
|
||||
return t,v
|
||||
end
|
||||
|
||||
local TL,LTL = TokenList.new, TokenList.new_list
|
||||
|
||||
|
||||
local function tappend (tl,t,val)
|
||||
val = val or t
|
||||
append(tl,{t,val})
|
||||
end
|
||||
|
||||
--- get a balanced block.
|
||||
-- Typically for grabbing up to an `end` keyword including any nested
|
||||
-- `if`, `do` or `function` blocks with their own `end` keywords.
|
||||
-- @param tok the token stream
|
||||
-- @param begintokens set of tokens requiring their own nested *endtokens*
|
||||
-- (default: `{['do']=true,['function']=true,['if']=true}`)
|
||||
-- @param endtokens set of tokens ending a block (default:`{['end']=true}`)
|
||||
-- @return list of tokens
|
||||
-- @return block end token in form `{type,value}`
|
||||
-- @usage
|
||||
-- -- copy a balanced table constructor
|
||||
-- get:expecting '{'
|
||||
-- put '{':tokens (get:block ({['{']=true}, {['}']=true}) '}')
|
||||
function Getter.block(tok,begintokens,endtokens)
|
||||
begintokens = begintokens or {['do']=true,['function']=true,['if']=true}
|
||||
endtokens = endtokens or {['end']=true}
|
||||
local level = 1 -- used to count expected matching `endtokens`
|
||||
local tl = TL()
|
||||
local token,value
|
||||
repeat
|
||||
token,value = tok()
|
||||
if not token then return nil,'unexpected end of block' end
|
||||
if begintokens[value] then
|
||||
level = level + 1
|
||||
elseif endtokens[value] then
|
||||
level = level - 1
|
||||
end
|
||||
if level > 0 then -- final end token is returned separately
|
||||
tappend(tl,token,value)
|
||||
end
|
||||
until level == 0
|
||||
return tl,tok_new{token,value}
|
||||
end
|
||||
|
||||
--- get a delimited list of token lists.
|
||||
-- Typically used for grabbing argument lists like ('hello',a+1,fred(c,d)); will count parens
|
||||
-- so that the delimiter (usually a comma) is ignored inside sub-expressions. You must have
|
||||
-- already read the start token of the list, e.g. open parentheses. It will eat the end token
|
||||
-- and return the list of TLs, plus the end token. Based on similar code in Penlight's
|
||||
-- `pl.lexer` module.
|
||||
-- @param tok the token stream
|
||||
-- @param endt the end token (default ')')
|
||||
-- @param delim the delimiter (default ',')
|
||||
-- @return list of token lists
|
||||
-- @return end token in form {type,value}
|
||||
function Getter.list(tok,endtoken,delim)
|
||||
endtoken = endtoken or ')'
|
||||
delim = delim or ','
|
||||
local parm_values = LTL()
|
||||
local level = 1 -- used to count ( and )
|
||||
local tl = TL()
|
||||
local is_end
|
||||
if type(endtoken) == 'function' then
|
||||
is_end = endtoken
|
||||
elseif endtoken == '\n' then
|
||||
is_end = function(t,val)
|
||||
return t == 'space' and val:find '\n'
|
||||
end
|
||||
else
|
||||
is_end = function (t)
|
||||
return t == endtoken
|
||||
end
|
||||
end
|
||||
local token,value = tok()
|
||||
if is_end(token,value) then return parm_values end
|
||||
if token == 'space' then
|
||||
token,value = tok()
|
||||
end
|
||||
while true do
|
||||
if not token then return nil,'unexpected end of list' end -- end of stream is an error!
|
||||
if is_end(token,value) and level == 1 then
|
||||
append(parm_values,tl)
|
||||
break
|
||||
elseif token == '(' then
|
||||
level = level + 1
|
||||
tappend(tl,'(')
|
||||
elseif token == ')' then
|
||||
level = level - 1
|
||||
if level == 0 then -- finished with parm list
|
||||
append(parm_values,tl)
|
||||
break
|
||||
else
|
||||
tappend(tl,')')
|
||||
end
|
||||
elseif token == '{' then
|
||||
level = level + 1
|
||||
tappend(tl,'{')
|
||||
elseif token == '}' then
|
||||
level = level - 1
|
||||
tappend(tl,'}')
|
||||
elseif token == delim and level == 1 then
|
||||
append(parm_values,tl) -- a new parm
|
||||
tl = TL()
|
||||
else
|
||||
tappend(tl,token,value)
|
||||
end
|
||||
token,value=tok()
|
||||
end
|
||||
return parm_values,tok_new{token,value}
|
||||
end
|
||||
|
||||
function Getter.upto_keywords (k1,k2)
|
||||
return function(t,v)
|
||||
return t == 'keyword' and (v == k1 or v == k2)
|
||||
end,''
|
||||
end
|
||||
|
||||
local tnext = Getter.next
|
||||
|
||||
|
||||
function Getter.upto(tok,k1,k2)
|
||||
local endt = k1
|
||||
if type(k1) == 'string' and k1:match '^%a+$' then
|
||||
endt = Getter.upto_keywords(k1,k2)
|
||||
end
|
||||
local ltl,tok = tok:list(endt,'')
|
||||
M.assert(ltl ~= nil and #ltl > 0,'failed to grab tokens')
|
||||
return ltl[1],tok
|
||||
end
|
||||
|
||||
function Getter.line(tok)
|
||||
return tok:upto(function(t,v)
|
||||
return (t=='space' and v:match '\n') or t == 'comment'
|
||||
end)
|
||||
end
|
||||
|
||||
|
||||
local function prettyprint (t, v)
|
||||
v = v:gsub ("\n", "\\n")
|
||||
if t == "string" then
|
||||
if #v > 16 then v = v:sub(1,12).."..."..v:sub(1,1) end
|
||||
return t.." "..v
|
||||
end
|
||||
if #v > 16 then v = v:sub(1,12).."..." end
|
||||
if t == "space" or t == "comment" or t == "keyword" then
|
||||
return t.." '"..v.."'"
|
||||
elseif t == v then
|
||||
return "'"..v.."'"
|
||||
else
|
||||
return t.." "..v
|
||||
end
|
||||
end
|
||||
|
||||
--- get the next identifier token.
|
||||
-- (will be an error if the token has wrong type)
|
||||
-- @return identifier name
|
||||
function Getter.iden(tok)
|
||||
local t,v = tnext(tok)
|
||||
M.assert(t == 'iden','expecting identifier, got '..prettyprint(t,v))
|
||||
return v
|
||||
end
|
||||
|
||||
Getter.name = Getter.iden -- backwards compatibility!
|
||||
|
||||
--- get the next number token.
|
||||
-- (will be an error if the token has wrong type)
|
||||
-- @return converted number
|
||||
function Getter.number(tok)
|
||||
local t,v = tnext(tok)
|
||||
M.assert(t == 'number','expecting number, got '..prettyprint(t,v))
|
||||
return tonumber(v)
|
||||
end
|
||||
|
||||
--- get a delimited list of identifiers.
|
||||
-- works like list.
|
||||
-- @param tok the token stream
|
||||
-- @param endt the end token (default ')')
|
||||
-- @param delim the delimiter (default ',')
|
||||
-- @see list
|
||||
function Getter.idens(tok,endt,delim)
|
||||
local ltl,err = tok:list(endt,delim)
|
||||
if not ltl then error('idens: '..err) end
|
||||
local names = {}
|
||||
-- list() will return {{}} for an empty list of tlists
|
||||
for i = 1,#ltl do
|
||||
local tl = ltl[i]
|
||||
local tv = tl[1]
|
||||
if tv then
|
||||
if tv[1] == 'space' then tv = tl[2] end
|
||||
names[i] = tv[2]
|
||||
end
|
||||
end
|
||||
return names, err
|
||||
end
|
||||
|
||||
Getter.names = Getter.idens -- backwards compatibility!
|
||||
|
||||
--- get the next string token.
|
||||
-- (will be an error if the token has wrong type)
|
||||
-- @return string value (without quotes)
|
||||
function Getter.string(tok)
|
||||
local t,v = tok:expecting("string")
|
||||
return v:sub(2,-2)
|
||||
end
|
||||
|
||||
--- assert that the next token has the given type. This will throw an
|
||||
-- error if the next non-whitespace token does not match.
|
||||
-- @param type a token type ('iden','string',etc)
|
||||
-- @param value a token value (optional)
|
||||
-- @usage get:expecting '('
|
||||
-- @usage get:expecting ('iden','bonzo')
|
||||
function Getter.expecting (tok,type,value)
|
||||
local t,v = tnext(tok)
|
||||
if t ~= type then M.error ("expected "..type.." got "..prettyprint(t,v)) end
|
||||
if value then
|
||||
if v ~= value then M.error("expected "..value.." got "..prettyprint(t,v)) end
|
||||
end
|
||||
return t,v
|
||||
end
|
||||
|
||||
--- peek ahead or before in the token stream.
|
||||
-- @param k positive delta for looking ahead, negative for looking behind.
|
||||
-- @param dont_skip true if you want to check for whitespace
|
||||
-- @return the token type
|
||||
-- @return the token value
|
||||
-- @return the token offset
|
||||
-- @function Getter.peek
|
||||
|
||||
--- peek ahead two tokens.
|
||||
-- @return first token type
|
||||
-- @return first token value
|
||||
-- @return second token type
|
||||
-- @return second token value
|
||||
-- @function Getter.peek2
|
||||
|
||||
--- patch the token stream at the end.
|
||||
-- @param idx index in output table
|
||||
-- @param text to replace value at that index
|
||||
-- @function Getter.patch
|
||||
|
||||
--- put out a placeholder for later patching.
|
||||
-- @param put a putter object
|
||||
-- @return an index into the output table
|
||||
-- @function Getter.placeholder
|
||||
|
||||
return Getter
|
@ -1,201 +0,0 @@
|
||||
---------------
|
||||
-- A TokenList class for generating token lists.
|
||||
--
|
||||
-- There are also useful `get_` methods for extracting values from
|
||||
-- the first token.
|
||||
--
|
||||
-- @module macro.TokenList
|
||||
|
||||
local TokenList = {}
|
||||
local M = TokenList
|
||||
TokenList.__index = TokenList
|
||||
|
||||
local append = table.insert
|
||||
|
||||
function TokenList.new (tl)
|
||||
return setmetatable(tl or {},TokenList)
|
||||
end
|
||||
|
||||
local TokenListList = {}
|
||||
|
||||
function TokenList.new_list (ltl)
|
||||
return setmetatable(ltl or {},TokenListList)
|
||||
end
|
||||
|
||||
TokenListList.__index = function(self,key)
|
||||
local m = TokenList[key]
|
||||
return function(self,...)
|
||||
local res = {}
|
||||
for i = 1,#self do res[i] = m(self[i],...) end
|
||||
return TokenList.new_list(res)
|
||||
end
|
||||
end
|
||||
|
||||
-- token-getting helpers
|
||||
|
||||
|
||||
local function extract (tl)
|
||||
local tk = tl[1]
|
||||
if tk[1] == 'space' then
|
||||
tk = tl[2]
|
||||
end
|
||||
return tk
|
||||
end
|
||||
|
||||
--- get an identifier from front of a token list.
|
||||
-- @return identifier name
|
||||
function TokenList.get_iden (tl)
|
||||
local tk = extract(tl)
|
||||
M.assert(tk[1]=='iden','expecting identifier')
|
||||
return tk[2]
|
||||
end
|
||||
|
||||
--- get an number from front of a token list.
|
||||
-- @return number
|
||||
function TokenList.get_number(tl)
|
||||
local tk = extract(tl)
|
||||
M.assert(tk[1]=='number','expecting number')
|
||||
return tonumber(tk[2])
|
||||
end
|
||||
|
||||
--- get a string from front of a token list.
|
||||
-- @return string value (without quotes)
|
||||
function TokenList.get_string(tl)
|
||||
local tk = extract(tl)
|
||||
M.assert(tk[1]=='string')
|
||||
return tk[2]:sub(2,-2) -- watch out! what about long string literals??
|
||||
end
|
||||
|
||||
--- takes a token list and strips spaces and comments.
|
||||
-- @return new tokenlist
|
||||
function TokenList.strip_spaces (tl)
|
||||
local out = TokenList.new()
|
||||
for _,t in ipairs(tl) do
|
||||
if t[1] ~= 'comment' and t[1] ~= 'space' then
|
||||
append(out,t)
|
||||
end
|
||||
end
|
||||
return out
|
||||
end
|
||||
|
||||
--- pick the n-th token from this tokenlist.
|
||||
-- Note that it returns the value and type, not the type and value.
|
||||
-- @param n (1 to #self)
|
||||
-- @return token value
|
||||
-- @return token type
|
||||
function TokenList.pick (tl,n)
|
||||
local t = tl[n]
|
||||
return t[2],t[1]
|
||||
end
|
||||
|
||||
-- token-putting helpers
|
||||
local comma,space = {',',','},{'space',' '}
|
||||
|
||||
--- append an identifier.
|
||||
-- @param name the identifier
|
||||
-- @param no_space true if you don't want a space after the iden
|
||||
-- @return self
|
||||
function TokenList.iden(res,name,no_space)
|
||||
append(res,{'iden',name})
|
||||
if not no_space then
|
||||
append(res,space)
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
TokenList.name = TokenList.iden -- backwards compatibility!
|
||||
|
||||
--- append a string.
|
||||
-- @param s the string
|
||||
-- @return self
|
||||
function TokenList.string(res,s)
|
||||
append(res,{'string','"'..s..'"'})
|
||||
return res
|
||||
end
|
||||
|
||||
--- append a number.
|
||||
-- @param val the number
|
||||
-- @return self
|
||||
function TokenList.number(res,val)
|
||||
append(res,{'number',val})
|
||||
return res
|
||||
end
|
||||
|
||||
--- put out a list of identifiers, separated by commas.
|
||||
-- @param res output token list
|
||||
-- @param names a list of identifiers
|
||||
-- @return self
|
||||
function TokenList.idens(res,names)
|
||||
for i = 1,#names do
|
||||
res:iden(names[i],true)
|
||||
if i ~= #names then append(res,comma) end
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
TokenList.names = TokenList.idens -- backwards compatibility!
|
||||
|
||||
--- put out a token list.
|
||||
-- @param res output token list
|
||||
-- @param tl a token list
|
||||
-- @return self
|
||||
function TokenList.tokens(res,tl)
|
||||
for j = 1,#tl do
|
||||
append(res,tl[j])
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
--- put out a list of token lists, separated by commas.
|
||||
-- @param res output token list
|
||||
-- @param ltl a list of token lists
|
||||
-- @return self
|
||||
function TokenList.list(res,ltl)
|
||||
for i = 1,#ltl do
|
||||
res:tokens(ltl[i])
|
||||
if i ~= #ltl then append(res,comma) end
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
--- put out a space token.
|
||||
-- @param res output token list
|
||||
-- @param space a string containing only whitespace (default ' ')
|
||||
-- @return self
|
||||
function TokenList.space(res,space)
|
||||
append(res,{'space',space or ' '})
|
||||
return res
|
||||
end
|
||||
|
||||
--- put out a keyword token.
|
||||
-- @param res output token list
|
||||
-- @param keyw a Lua keyword
|
||||
-- @param no_space true if you don't want a space after the iden
|
||||
-- @return self
|
||||
function TokenList.keyword(res,keyw,no_space)
|
||||
append(res,{'keyword',keyw})
|
||||
if not no_space then
|
||||
append(res,space)
|
||||
end
|
||||
return res
|
||||
end
|
||||
|
||||
--- convert this tokenlist into a string.
|
||||
function TokenList.__tostring(tl)
|
||||
local res = {}
|
||||
for j = 1,#tl do
|
||||
append(res,tl[j][2])
|
||||
end
|
||||
return table.concat(res)
|
||||
end
|
||||
|
||||
--- put out a operator token. This is the overloaded call operator
|
||||
-- for token lists.
|
||||
-- @param res output token list
|
||||
-- @param keyw an operator string
|
||||
function TokenList.__call(res,t,v)
|
||||
append(res,{t,v or t})
|
||||
return res
|
||||
end
|
||||
|
||||
return TokenList
|
@ -1,161 +0,0 @@
|
||||
-------
|
||||
-- LuaMacro built-in macros.
|
||||
-- @module macro.builtin
|
||||
|
||||
local M = require 'macro'
|
||||
|
||||
local function macro_def (scoped)
|
||||
return function (get)
|
||||
local t,name,parms,openp
|
||||
local t,name = get:next()
|
||||
local upto,ret
|
||||
if t == '(' then
|
||||
t,name = get:next()
|
||||
upto = function(t,v) return t == ')' end
|
||||
else
|
||||
upto = function(t,v)
|
||||
return t == 'space' and v:find '\n'
|
||||
end
|
||||
-- return space following (returned by copy_tokens)
|
||||
ret = true
|
||||
end
|
||||
-- might be immediately followed by a parm list
|
||||
t,openp = get()
|
||||
if openp == '(' then
|
||||
parms = get:names()
|
||||
end
|
||||
-- the actual substitution is up to the end of the line
|
||||
local args, space = M.copy_tokens(get,upto)
|
||||
if scoped then
|
||||
M.define_scoped(name,args,parms)
|
||||
else
|
||||
M.set_macro(name,args,parms)
|
||||
end
|
||||
return ret and space[2]
|
||||
end
|
||||
end
|
||||
|
||||
--- a macro for defining lexically scoped simple macros.
|
||||
-- def_ may be followed by an arglist, and the substitution is the
|
||||
-- rest of the line.
|
||||
-- @usage def_ block (function() _END_CLOSE_
|
||||
-- @usage def_ sqr(x) ((x)*(x))
|
||||
-- @macro def_
|
||||
M.define ('def_',macro_def(true))
|
||||
|
||||
--- a global version of `def_`.
|
||||
-- @see def_
|
||||
-- @macro define_
|
||||
M.define ('define_',macro_def(false))
|
||||
|
||||
--- set the value of an existing macro.
|
||||
-- the name and value follows immediately, and the value must be
|
||||
-- a single token
|
||||
-- @usage set_ T 'string'
|
||||
-- @usage set_ F function
|
||||
-- @macro set_
|
||||
M.define('set_',function(get)
|
||||
local name = get:name()
|
||||
local t,v = get:next()
|
||||
M.set_macro(name,{{t,v}})
|
||||
end)
|
||||
|
||||
--- undefining identifier macros.
|
||||
-- @macro undef_
|
||||
M.define('undef_',function(get)
|
||||
M.set_macro(get:name())
|
||||
end)
|
||||
|
||||
--- Insert text after current block end. `_END_` is followed by a quoted string
|
||||
-- and is used to insert that string after the current block closes.
|
||||
-- @macro _END_
|
||||
M.define ('_END_',function(get)
|
||||
local str = get:string()
|
||||
M.block_handler(-1,function(get,word)
|
||||
if word ~= 'end' then return nil,true end
|
||||
return str
|
||||
end)
|
||||
end)
|
||||
|
||||
--- insert an end after the next closing block.
|
||||
-- @macro _END_END_
|
||||
-- @see _END_
|
||||
M.define '_END_END_ _END_ " end"'
|
||||
|
||||
--- insert a closing parens after next closing block.
|
||||
-- @usage def_ begin (function() _END_CLOSE_
|
||||
-- fun begin ... end --> fun (function() ... end)
|
||||
-- @macro _END_CLOSE_
|
||||
-- @see _END_
|
||||
M.define '_END_CLOSE_ _END_ ")"'
|
||||
|
||||
--- 'stringizing' macro.
|
||||
-- Will convert its argument into a string.
|
||||
-- @usage def_ _assert(x) assert(x,_STR_(x))
|
||||
-- @macro _STR_
|
||||
M.define('_STR_(x)',function(x)
|
||||
x = tostring(x)
|
||||
local put = M.Putter()
|
||||
return put '"':name(x) '"'
|
||||
end)
|
||||
|
||||
-- macro stack manipulation
|
||||
|
||||
|
||||
--- push a value onto a given macro' stack.
|
||||
-- @macro _PUSH_
|
||||
-- @param mac existing macro name
|
||||
-- @param V a string
|
||||
M.define('_PUSH_(mac,V)',function(mac,V)
|
||||
M.push_macro_stack(mac:get_string(),V:get_string())
|
||||
end)
|
||||
|
||||
--- pop a value from a macro's stack.
|
||||
-- @macro _POP_
|
||||
-- @param mac existing macro name
|
||||
-- @return a string
|
||||
-- @see _PUSH_
|
||||
M.define('_POP_',function(get,put)
|
||||
local val = M.pop_macro_stack(get:string())
|
||||
if val then
|
||||
return put(val)
|
||||
end
|
||||
end)
|
||||
|
||||
--- drop the top of a macro's stack.
|
||||
-- Like `_POP_`, except that it does not return the value
|
||||
-- @macro _DROP_
|
||||
-- @return existing macro name
|
||||
-- @see _POP_
|
||||
M.define('_DROP_',function(get)
|
||||
M.pop_macro_stack(get:string())
|
||||
end)
|
||||
|
||||
--- Load a Lua module immediately. This allows macro definitions to
|
||||
-- to be loaded before the rest of the file is parsed.
|
||||
-- If the module returns a function, then this is assumed to be a
|
||||
-- substitution function, allowing macro modules to insert code
|
||||
-- at this point.
|
||||
-- @macro require_
|
||||
M.define('require_',function(get,put)
|
||||
local name = get:string()
|
||||
local ok,fn = pcall(require,name)
|
||||
if not ok then
|
||||
fn = require('macro.'..name)
|
||||
end
|
||||
if type(fn) == 'function' then
|
||||
return fn(get,put)
|
||||
end
|
||||
end)
|
||||
|
||||
--- Include the contents of a file. This inserts the file directly
|
||||
-- into the token stream, and is equivalent to cpp's `#include` directive.
|
||||
-- @macro include_
|
||||
M.define('include_',function(get)
|
||||
local str = get:string()
|
||||
local f = M.assert(io.open(str))
|
||||
local txt = f:read '*a'
|
||||
f:close()
|
||||
M.push_substitution(txt)
|
||||
end)
|
||||
|
169
macro/clexer.lua
169
macro/clexer.lua
@ -1,169 +0,0 @@
|
||||
--[[--- A C lexical scanner using LPeg.
|
||||
= CREDITS
|
||||
= based on the C lexer in Peter Odding's lua-lxsh
|
||||
@module macro.clexer
|
||||
--]]
|
||||
|
||||
local clexer = {}
|
||||
local lpeg = require 'lpeg'
|
||||
local P, R, S, C, Cc, Ct = lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cc, lpeg.Ct
|
||||
|
||||
-- create a pattern which captures the lua value [id] and the input matching
|
||||
-- [patt] in a table
|
||||
local function token(id, patt) return Ct(Cc(id) * C(patt)) end
|
||||
|
||||
-- private interface
|
||||
local table_of_tokens
|
||||
local extra_tokens
|
||||
|
||||
function clexer.add_extra_tokens(extra)
|
||||
extra_tokens = extra_tokens or {}
|
||||
for _,t in ipairs(extra) do
|
||||
table.insert(extra_tokens,t)
|
||||
end
|
||||
table_of_tokens = nil -- re-initialize
|
||||
end
|
||||
|
||||
function clexer.init ()
|
||||
local digit = R('09')
|
||||
|
||||
local upp, low = R'AZ', R'az'
|
||||
local oct, dec = R'07', R'09'
|
||||
local hex = dec + R'AF' + R'af'
|
||||
local letter = upp + low
|
||||
local alnum = letter + dec + '_'
|
||||
local endline = S'\r\n\f'
|
||||
local newline = '\r\n' + endline
|
||||
local escape = '\\' * ( newline
|
||||
+ S'\\"\'?abfnrtv'
|
||||
+ (#oct * oct^-3)
|
||||
+ ('x' * #hex * hex^-2))
|
||||
|
||||
|
||||
-- range of valid characters after first character of identifier
|
||||
local idsafe = R('AZ', 'az', '\127\255') + P '_'
|
||||
|
||||
-- operators
|
||||
local OT = P '=='
|
||||
if extra_tokens then
|
||||
for _,ex in ipairs(extra_tokens) do
|
||||
OT = OT + P(ex)
|
||||
end
|
||||
end
|
||||
local operator = token('operator', OT + P '.' + P'>>=' + '<<=' + '--' + '>>' + '>=' + '/=' + '==' + '<='
|
||||
+ '+=' + '<<' + '*=' + '++' + '&&' + '|=' + '||' + '!=' + '&=' + '-='
|
||||
+ '^=' + '%=' + '->' + S',)*%+&(-~/^]{}|.[>!?:=<;')
|
||||
-- identifiers
|
||||
local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
|
||||
|
||||
-- keywords
|
||||
local keyword = token('keyword', (P 'auto' + P 'break' + P 'case' + P'char' +
|
||||
P 'const' + P 'continue' + P 'default' +
|
||||
P 'do' + P 'double' + P 'else' + P 'enum' + P 'extern' + P 'float' +
|
||||
P 'for' + P 'goto' + P 'if' + P 'int' + P 'long' + P 'register' +
|
||||
P 'return' + P 'short' + P 'signed' + P 'sizeof' + P 'static' +
|
||||
P 'struct' + P 'switch' + P 'typedef' + P 'union' + P 'void' +
|
||||
P 'volatile' + P 'while') * -(idsafe + digit))
|
||||
|
||||
-- numbers
|
||||
local number_sign = S'+-'^-1
|
||||
local number_decimal = digit ^ 1
|
||||
local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
|
||||
local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
|
||||
(S'eE' * number_sign * digit^1)^-1
|
||||
local number = token('number', number_hexadecimal +
|
||||
number_float +
|
||||
number_decimal)
|
||||
|
||||
|
||||
local string = token('string', '"' * ((1 - S'\\\r\n\f"') + escape)^0 * '"')
|
||||
local char = token('char',"'" * ((1 - S"\\\r\n\f'") + escape) * "'")
|
||||
|
||||
-- comments
|
||||
local singleline_comment = P '//' * (1 - S '\r\n\f') ^ 0
|
||||
local multiline_comment = '/*' * (1 - P'*/')^0 * '*/'
|
||||
local comment = token('comment', multiline_comment + singleline_comment)
|
||||
local prepro = token('prepro',P '#' * (1 - S '\r\n\f') ^ 0)
|
||||
|
||||
-- whitespace
|
||||
local whitespace = token('space', S('\r\n\f\t ')^1)
|
||||
|
||||
-- ordered choice of all tokens and last-resort error which consumes one character
|
||||
local any_token = whitespace + number + keyword + ident +
|
||||
string + char + comment + prepro + operator + token('error', 1)
|
||||
|
||||
|
||||
table_of_tokens = Ct(any_token ^ 0)
|
||||
end
|
||||
|
||||
-- increment [line] by the number of line-ends in [text]
|
||||
local function sync(line, text)
|
||||
local index, limit = 1, #text
|
||||
while index <= limit do
|
||||
local start, stop = text:find('\r\n', index, true)
|
||||
if not start then
|
||||
start, stop = text:find('[\r\n\f]', index)
|
||||
if not start then break end
|
||||
end
|
||||
index = stop + 1
|
||||
line = line + 1
|
||||
end
|
||||
return line
|
||||
end
|
||||
clexer.sync = sync
|
||||
|
||||
clexer.line = 0
|
||||
|
||||
-- we only need to synchronize the line-counter for these token types
|
||||
local multiline_tokens = { comment = true, space = true }
|
||||
clexer.multiline_tokens = multiline_tokens
|
||||
|
||||
function clexer.scan_c_tokenlist(input)
|
||||
if not table_of_tokens then
|
||||
clexer.init()
|
||||
end
|
||||
assert(type(input) == 'string', 'bad argument #1 (expected string)')
|
||||
local line = 1
|
||||
local tokens = lpeg.match(table_of_tokens, input)
|
||||
for i, token in pairs(tokens) do
|
||||
local t = token[1]
|
||||
if t == 'operator' or t == 'error' then
|
||||
token[1] = token[2]
|
||||
end
|
||||
token[3] = line
|
||||
if multiline_tokens[t] then
|
||||
line = sync(line, token[2])
|
||||
end
|
||||
end
|
||||
return tokens
|
||||
end
|
||||
|
||||
--- get a token iterator from a source containing Lua code.
|
||||
-- S is the source - can be a string or a file-like object (i.e. read() returns line)
|
||||
-- Note that this token iterator includes spaces and comments, and does not convert
|
||||
-- string and number tokens - so e.g. a string token is quoted and a number token is
|
||||
-- an unconverted string.
|
||||
function clexer.scan_c(input,name)
|
||||
if type(input) ~= 'string' and input.read then
|
||||
input = input:read('*a')
|
||||
end
|
||||
local tokens = clexer.scan_c_tokenlist(input)
|
||||
local i, n = 1, #tokens
|
||||
return function(k)
|
||||
if k ~= nil then
|
||||
k = i + k
|
||||
if k < 1 or k > n then return nil end
|
||||
return tokens[k]
|
||||
end
|
||||
local tok = tokens[i]
|
||||
i = i + 1
|
||||
if tok then
|
||||
clexer.line = tok[3]
|
||||
clexer.name = name
|
||||
return tok[1],tok[2]
|
||||
end
|
||||
end
|
||||
|
||||
end
|
||||
|
||||
return clexer
|
179
macro/lexer.lua
179
macro/lexer.lua
@ -1,179 +0,0 @@
|
||||
--[[--- A Lua lexical scanner using LPeg.
|
||||
= CREDITS
|
||||
Written by Peter Odding, 2007/04/04
|
||||
|
||||
= THANKS TO
|
||||
- the Lua authors for a wonderful language;
|
||||
- Roberto for LPeg;
|
||||
- caffeine for keeping me awake :)
|
||||
|
||||
= LICENSE
|
||||
Shamelessly ripped from the SQLite[3] project:
|
||||
|
||||
The author disclaims copyright to this source code. In place of a legal
|
||||
notice, here is a blessing:
|
||||
|
||||
May you do good and not evil.
|
||||
May you find forgiveness for yourself and forgive others.
|
||||
May you share freely, never taking more than you give.
|
||||
|
||||
@module macro.lexer
|
||||
--]]
|
||||
|
||||
local lexer = {}
|
||||
local lpeg = require 'lpeg'
|
||||
local P, R, S, C, Cb, Cc, Cg, Cmt, Ct =
|
||||
lpeg.P, lpeg.R, lpeg.S, lpeg.C, lpeg.Cb, lpeg.Cc, lpeg.Cg, lpeg.Cmt, lpeg.Ct
|
||||
|
||||
-- create a pattern which captures the lua value [id] and the input matching
|
||||
-- [patt] in a table
|
||||
local function token(id, patt) return Ct(Cc(id) * C(patt)) end
|
||||
|
||||
-- private interface
|
||||
local table_of_tokens
|
||||
local extra_tokens
|
||||
|
||||
function lexer.add_extra_tokens(extra)
|
||||
extra_tokens = extra_tokens or {}
|
||||
for _,t in ipairs(extra) do
|
||||
table.insert(extra_tokens,t)
|
||||
end
|
||||
table_of_tokens = nil -- re-initialize
|
||||
end
|
||||
|
||||
function lexer.init ()
|
||||
local digit = R('09')
|
||||
|
||||
-- range of valid characters after first character of identifier
|
||||
--local idsafe = R('AZ', 'az', '\127\255') + P '_'
|
||||
local idsafe = R('AZ', 'az') + P '_' + R '\206\223' * R '\128\255'
|
||||
-- operators
|
||||
local OT = P '=='
|
||||
if extra_tokens then
|
||||
for _,ex in ipairs(extra_tokens) do
|
||||
OT = OT + P(ex)
|
||||
end
|
||||
end
|
||||
local operator = token('operator', OT + P '.' + P '~=' + P '<=' + P '>=' + P '...'
|
||||
+ P '..' + S '+-*/%^#=<>;:,.{}[]()')
|
||||
-- identifiers
|
||||
local ident = token('iden', idsafe * (idsafe + digit) ^ 0)
|
||||
|
||||
-- keywords
|
||||
local keyword = token('keyword', (P 'and' + P 'break' + P 'do' + P 'elseif' +
|
||||
P 'else' + P 'end' + P 'false' + P 'for' + P 'function' + P 'if' +
|
||||
P 'in' + P 'local' + P 'nil' + P 'not' + P 'or' + P 'repeat' + P 'return' +
|
||||
P 'then' + P 'true' + P 'until' + P 'while') * -(idsafe + digit))
|
||||
|
||||
-- numbers
|
||||
local number_sign = S'+-'^-1
|
||||
local number_decimal = digit ^ 1
|
||||
local number_hexadecimal = P '0' * S 'xX' * R('09', 'AF', 'af') ^ 1
|
||||
local number_float = (digit^1 * P'.' * digit^0 + P'.' * digit^1) *
|
||||
(S'eE' * number_sign * digit^1)^-1
|
||||
local number = token('number', number_hexadecimal +
|
||||
number_float +
|
||||
number_decimal)
|
||||
|
||||
-- callback for [=[ long strings ]=]
|
||||
-- ps. LPeg is for Lua what regex is for Perl, which makes me smile :)
|
||||
local equals = P '=' ^ 0
|
||||
local open = P '[' * Cg(equals, "init") * P '[' * P '\n' ^ -1
|
||||
local close = P ']' * C(equals) * P ']'
|
||||
local closeeq = Cmt(close * Cb "init", function (s, i, a, b) return a == b end)
|
||||
local longstring = open * C((P(1) - closeeq)^0) * close --/ 1
|
||||
|
||||
-- strings
|
||||
local singlequoted_string = P "'" * ((1 - S "'\r\n\f\\") + (P '\\' * 1)) ^ 0 * "'"
|
||||
local doublequoted_string = P '"' * ((1 - S '"\r\n\f\\') + (P '\\' * 1)) ^ 0 * '"'
|
||||
local string = token('string', singlequoted_string +
|
||||
doublequoted_string +
|
||||
longstring)
|
||||
|
||||
-- comments
|
||||
local singleline_comment = P '--' * (1 - S '\r\n\f') ^ 0
|
||||
local multiline_comment = P '--' * longstring
|
||||
local comment = token('comment', multiline_comment + singleline_comment)
|
||||
|
||||
-- whitespace
|
||||
local whitespace = token('space', S('\r\n\f\t ')^1)
|
||||
|
||||
-- ordered choice of all tokens and last-resort error which consumes one character
|
||||
local any_token = whitespace + number + keyword + ident +
|
||||
string + comment + operator + token('error', 1)
|
||||
|
||||
|
||||
table_of_tokens = Ct(any_token ^ 0)
|
||||
end
|
||||
|
||||
-- increment [line] by the number of line-ends in [text]
|
||||
local function sync(line, text)
|
||||
local index, limit = 1, #text
|
||||
while index <= limit do
|
||||
local start, stop = text:find('\r\n', index, true)
|
||||
if not start then
|
||||
start, stop = text:find('[\r\n\f]', index)
|
||||
if not start then break end
|
||||
end
|
||||
index = stop + 1
|
||||
line = line + 1
|
||||
end
|
||||
return line
|
||||
end
|
||||
lexer.sync = sync
|
||||
|
||||
lexer.line = 0
|
||||
|
||||
-- we only need to synchronize the line-counter for these token types
|
||||
local multiline_tokens = { comment = true, string = true, space = true }
|
||||
lexer.multiline_tokens = multiline_tokens
|
||||
|
||||
function lexer.scan_lua_tokenlist(input)
|
||||
if not table_of_tokens then
|
||||
lexer.init()
|
||||
end
|
||||
assert(type(input) == 'string', 'bad argument #1 (expected string)')
|
||||
local line = 1
|
||||
local tokens = lpeg.match(table_of_tokens, input)
|
||||
for i, token in pairs(tokens) do
|
||||
local t = token[1]
|
||||
if t == 'operator' or t == 'error' then
|
||||
token[1] = token[2]
|
||||
end
|
||||
token[3] = line
|
||||
if multiline_tokens[t] then
|
||||
line = sync(line, token[2])
|
||||
end
|
||||
end
|
||||
return tokens
|
||||
end
|
||||
|
||||
--- get a token iterator from a source containing Lua code.
|
||||
-- Note that this token iterator includes spaces and comments, and does not convert
|
||||
-- string and number tokens - so e.g. a string token is quoted and a number token is
|
||||
-- an unconverted string.
|
||||
-- @param input the source - can be a string or a file-like object (i.e. read() returns line)
|
||||
-- @param name for the source
|
||||
function lexer.scan_lua(input,name)
|
||||
if type(input) ~= 'string' and input.read then
|
||||
input = input:read('*a')
|
||||
end
|
||||
local tokens = lexer.scan_lua_tokenlist(input)
|
||||
local i, n = 1, #tokens
|
||||
return function(k)
|
||||
if k ~= nil then
|
||||
k = i + k
|
||||
if k < 1 or k > n then return nil end
|
||||
return tokens[k]
|
||||
end
|
||||
local tok = tokens[i]
|
||||
i = i + 1
|
||||
if tok then
|
||||
lexer.line = tok[3]
|
||||
lexer.name = name
|
||||
return tok[1],tok[2]
|
||||
end
|
||||
end
|
||||
end
|
||||
|
||||
return lexer
|
Loading…
x
Reference in New Issue
Block a user