1
0
mirror of https://github.com/catseye/SixtyPical.git synced 2024-06-27 07:29:34 +00:00
SixtyPical/src/sixtypical/parser.py
2015-10-18 23:15:40 +01:00

267 lines
9.1 KiB
Python

# encoding: UTF-8
import re
from sixtypical.ast import Program, Defn, Routine, Block, Instr
from sixtypical.model import (
TYPE_BIT, TYPE_BYTE, TYPE_BYTE_TABLE, TYPE_ROUTINE, TYPE_VECTOR,
LocationRef, ConstantRef
)
class Scanner(object):
def __init__(self, text):
self.text = text
self.token = None
self.type = None
self.scan()
def scan_pattern(self, pattern, type, token_group=1, rest_group=2):
pattern = r'^(' + pattern + r')(.*?)$'
match = re.match(pattern, self.text, re.DOTALL)
if not match:
return False
else:
self.type = type
self.token = match.group(token_group)
self.text = match.group(rest_group)
return True
def scan(self):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
while self.scan_pattern(r'\/\/.*?[\n\r]', 'comment'):
self.scan_pattern(r'[ \t\n\r]*', 'whitespace')
if not self.text:
self.token = None
self.type = 'EOF'
return
if self.scan_pattern(r'\,|\@|\+|\{|\}', 'operator'):
return
if self.scan_pattern(r'\d+', 'integer literal'):
return
if self.scan_pattern(r'\$([0-9a-fA-F]+)', 'integer literal',
token_group=2, rest_group=3):
# ecch
self.token = str(eval('0x' + self.token))
return
if self.scan_pattern(r'\"(.*?)\"', 'string literal',
token_group=2, rest_group=3):
return
if self.scan_pattern(r'\w+', 'identifier'):
return
if self.scan_pattern(r'.', 'unknown character'):
return
else:
raise AssertionError("this should never happen, self.text=(%s)" % self.text)
def expect(self, token):
if self.token == token:
self.scan()
else:
raise SyntaxError("Expected '%s', but found '%s'" %
(token, self.token))
def on(self, token):
return self.token == token
def on_type(self, type):
return self.type == type
def check_type(self, type):
if not self.type == type:
raise SyntaxError("Expected %s, but found %s ('%s')" %
(type, self.type, self.token))
def consume(self, token):
if self.token == token:
self.scan()
return True
else:
return False
class SymEntry(object):
def __init__(self, ast_node, model):
self.ast_node = ast_node
self.model = model
class Parser(object):
def __init__(self, text):
self.scanner = Scanner(text)
self.symbols = {} # token -> SymEntry
for token in ('a', 'x', 'y'):
self.symbols[token] = SymEntry(None, LocationRef(TYPE_BYTE, token))
for token in ('c', 'z', 'n', 'v'):
self.symbols[token] = SymEntry(None, LocationRef(TYPE_BIT, token))
def lookup(self, name):
if name not in self.symbols:
raise SyntaxError('Undefined symbol "%s"' % name)
return self.symbols[name].model
def program(self):
defns = []
routines = []
while self.scanner.on('byte') or self.scanner.on('vector'):
defn = self.defn()
name = defn.name
if name in self.symbols:
raise SyntaxError('Symbol "%s" already declared' % name)
self.symbols[name] = SymEntry(defn, LocationRef(defn.type, name))
defns.append(defn)
while self.scanner.on('routine'):
routine = self.routine()
name = routine.name
if name in self.symbols:
raise SyntaxError(name)
self.symbols[name] = SymEntry(routine, LocationRef(TYPE_ROUTINE, name))
routines.append(routine)
self.scanner.check_type('EOF')
return Program(defns=defns, routines=routines)
def defn(self):
type = TYPE_BYTE
if self.scanner.consume('byte'):
type = TYPE_BYTE
if self.scanner.consume('table'):
type = TYPE_BYTE_TABLE
else:
self.scanner.expect('vector')
type = TYPE_VECTOR
self.scanner.check_type('identifier')
name = self.scanner.token
self.scanner.scan()
addr = None
if self.scanner.consume('@'):
self.scanner.check_type('integer literal')
addr = int(self.scanner.token)
self.scanner.scan()
return Defn(name=name, type=type, addr=addr)
def routine(self):
self.scanner.expect('routine')
name = self.scanner.token
self.scanner.scan()
inputs = []
outputs = []
trashes = []
if self.scanner.consume('inputs'):
inputs = self.locexprs()
if self.scanner.consume('outputs'):
outputs = self.locexprs()
if self.scanner.consume('trashes'):
trashes = self.locexprs()
if self.scanner.consume('@'):
self.scanner.check_type('integer literal')
block = None
addr = int(self.scanner.token)
self.scanner.scan()
else:
block = self.block()
addr = None
return Routine(
name=name, inputs=inputs, outputs=outputs, trashes=trashes,
block=block, addr=addr
)
def locexprs(self):
accum = []
accum.append(self.locexpr())
while self.scanner.consume(','):
accum.append(self.locexpr())
return accum
def locexpr(self):
if self.scanner.token in ('on', 'off'):
loc = ConstantRef(TYPE_BIT, 1 if self.scanner.token == 'on' else 0)
self.scanner.scan()
return loc
elif self.scanner.on_type('integer literal'):
loc = ConstantRef(TYPE_BYTE, int(self.scanner.token))
self.scanner.scan()
return loc
else:
loc = self.lookup(self.scanner.token)
self.scanner.scan()
return loc
def block(self):
instrs = []
self.scanner.expect('{')
while not self.scanner.on('}'):
instrs.append(self.instr())
self.scanner.expect('}')
return Block(instrs=instrs)
def instr(self):
if self.scanner.consume('if'):
inverted = False
if self.scanner.consume('not'):
inverted = True
src = self.locexpr()
block1 = self.block()
block2 = None
if self.scanner.consume('else'):
block2 = self.block()
return Instr(opcode='if', dest=None, src=src,
block1=block1, block2=block2, inverted=inverted)
elif self.scanner.consume('repeat'):
inverted = False
src = None
block = self.block()
if self.scanner.consume('until'):
if self.scanner.consume('not'):
inverted = True
src = self.locexpr()
else:
self.scanner.expect('forever')
return Instr(opcode='repeat', dest=None, src=src,
block=block, inverted=inverted)
elif self.scanner.token in ("ld", "add", "sub", "cmp", "and", "or", "xor"):
opcode = self.scanner.token
self.scanner.scan()
dest = self.locexpr()
self.scanner.expect(',')
src = self.locexpr()
index = None
if self.scanner.consume('+'):
index = self.locexpr()
return Instr(opcode=opcode, dest=dest, src=src, index=index)
elif self.scanner.token in ("st",):
opcode = self.scanner.token
self.scanner.scan()
src = self.locexpr()
self.scanner.expect(',')
dest = self.locexpr()
index = None
if self.scanner.consume('+'):
index = self.locexpr()
return Instr(opcode=opcode, dest=dest, src=src, index=index)
elif self.scanner.token in ("shl", "shr", "inc", "dec"):
opcode = self.scanner.token
self.scanner.scan()
dest = self.locexpr()
return Instr(opcode=opcode, dest=dest, src=None)
elif self.scanner.token in ("call",):
opcode = self.scanner.token
self.scanner.scan()
name = self.scanner.token
self.scanner.scan()
# TODO: check that is has been defined
return Instr(opcode=opcode, name=name, dest=None, src=None)
elif self.scanner.token in ("copy",):
opcode = self.scanner.token
self.scanner.scan()
src = self.locexpr()
self.scanner.expect(',')
dest = self.locexpr()
return Instr(opcode=opcode, dest=dest, src=src)
elif self.scanner.consume("with"):
self.scanner.expect("interrupts")
self.scanner.expect("off")
block = self.block()
return Instr(opcode='with-sei', dest=None, src=None, block=block)
else:
raise ValueError('bad opcode "%s"' % self.scanner.token)