prog8/il65/plyparser.py
Irmen de Jong 9b68722df3 attr
2018-01-07 14:38:52 +01:00

784 lines
18 KiB
Python

"""
Programming Language for 6502/6510 microprocessors
This is the parser of the IL65 code, that generates a parse tree.
Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
import attr
from ply.yacc import yacc
from typing import Union
from .symbols import SourceRef
from .lexer import tokens, lexer, find_tok_column # get the lexer tokens. required.
start = "start"
@attr.s(cmp=False, slots=True, frozen=False)
class AstNode:
sourceref = attr.ib(type=SourceRef)
@property
def lineref(self) -> str:
return "src l. " + str(self.sourceref.line)
def print_tree(self) -> None:
def tostr(node: AstNode, level: int) -> None:
if not isinstance(node, AstNode):
return
indent = " " * level
name = getattr(node, "name", "")
print(indent, node.__class__.__name__, repr(name))
try:
variables = vars(node).items()
except TypeError:
return
for name, value in variables:
if isinstance(value, AstNode):
tostr(value, level + 1)
if isinstance(value, (list, tuple, set)):
if len(value) > 0:
elt = list(value)[0]
if isinstance(elt, AstNode) or name == "nodes":
print(indent, " >", name, "=")
for elt in value:
tostr(elt, level + 2)
tostr(self, 0)
@attr.s(cmp=False)
class Module(AstNode):
nodes = attr.ib(type=list)
@attr.s(cmp=False)
class Directive(AstNode):
name = attr.ib(type=str)
args = attr.ib(type=list, default=attr.Factory(list))
@attr.s(cmp=False)
class Scope(AstNode):
nodes = attr.ib(type=list)
@attr.s(cmp=False)
class Block(AstNode):
scope = attr.ib(type=Scope)
name = attr.ib(type=str, default=None)
address = attr.ib(type=int, default=None)
@attr.s(cmp=False)
class Label(AstNode):
name = attr.ib(type=str)
@attr.s(cmp=False)
class Register(AstNode):
name = attr.ib(type=str)
@attr.s(cmp=False)
class PreserveRegs(AstNode):
registers = attr.ib(type=str)
@attr.s(cmp=False)
class Assignment(AstNode):
left = attr.ib() # type: Union[str, TargetRegisters, Dereference]
right = attr.ib()
@attr.s(cmp=False)
class AugAssignment(Assignment):
operator = attr.ib(type=str)
@attr.s(cmp=False)
class SubCall(AstNode):
target = attr.ib()
preserve_regs = attr.ib()
arguments = attr.ib()
@attr.s(cmp=False)
class Return(AstNode):
value_A = attr.ib(default=None)
value_X = attr.ib(default=None)
value_Y = attr.ib(default=None)
@attr.s(cmp=False)
class TargetRegisters(AstNode):
registers = attr.ib(type=list)
def add(self, register: str) -> None:
self.registers.append(register)
@attr.s(cmp=False)
class InlineAssembly(AstNode):
assembly = attr.ib(type=str)
@attr.s(cmp=False)
class VarDef(AstNode):
name = attr.ib(type=str)
vartype = attr.ib()
datatype = attr.ib()
value = attr.ib(default=None)
@attr.s(cmp=False, slots=True)
class Datatype(AstNode):
name = attr.ib(type=str)
dimension = attr.ib(type=list, default=None)
@attr.s(cmp=False)
class Subroutine(AstNode):
name = attr.ib(type=str)
param_spec = attr.ib()
result_spec = attr.ib()
scope = attr.ib(type=Scope, default=None)
address = attr.ib(type=int, default=None)
def __attrs_post_init__(self):
if self.scope is not None and self.address is not None:
raise ValueError("subroutine must have either a scope or an address, not both")
@attr.s(cmp=False)
class Goto(AstNode):
target = attr.ib()
if_stmt = attr.ib(default=None)
condition = attr.ib(default=None)
@attr.s(cmp=False)
class Dereference(AstNode):
location = attr.ib()
datatype = attr.ib()
@attr.s(cmp=False, slots=True)
class CallTarget(AstNode):
target = attr.ib()
address_of = attr.ib(type=bool)
@attr.s(cmp=False, slots=True)
class CallArgument(AstNode):
value = attr.ib()
name = attr.ib(type=str, default=None)
@attr.s(cmp=False)
class UnaryOp(AstNode):
operator = attr.ib(type=str)
operand = attr.ib()
@attr.s(cmp=False, slots=True)
class Expression(AstNode):
left = attr.ib()
operator = attr.ib(type=str)
right = attr.ib()
def p_start(p):
"""
start : empty
| module_elements
"""
if p[1]:
p[0] = Module(nodes=p[1], sourceref=_token_sref(p, 1))
def p_module(p):
"""
module_elements : module_elt
| module_elements module_elt
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_module_elt(p):
"""
module_elt : ENDL
| directive
| block
"""
p[0] = p[1]
def p_directive(p):
"""
directive : DIRECTIVE ENDL
| DIRECTIVE directive_args ENDL
"""
if len(p) == 2:
p[0] = Directive(name=p[1], sourceref=_token_sref(p, 1))
else:
p[0] = Directive(name=p[1], args=p[2], sourceref=_token_sref(p, 1))
def p_directive_args(p):
"""
directive_args : directive_arg
| directive_args ',' directive_arg
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[3]]
def p_directive_arg(p):
"""
directive_arg : NAME
| INTEGER
| STRING
"""
p[0] = p[1]
def p_block_name_addr(p):
"""
block : BITINVERT NAME INTEGER endl_opt scope
"""
p[0] = Block(name=p[2], address=p[3], scope=p[5], sourceref=_token_sref(p, 1))
def p_block_name(p):
"""
block : BITINVERT NAME endl_opt scope
"""
p[0] = Block(name=p[2], scope=p[4], sourceref=_token_sref(p, 1))
def p_block(p):
"""
block : BITINVERT endl_opt scope
"""
p[0] = Block(scope=p[3], sourceref=_token_sref(p, 1))
def p_endl_opt(p):
"""
endl_opt : empty
| ENDL
"""
pass
def p_scope(p):
"""
scope : '{' scope_elements_opt '}'
"""
p[0] = Scope(nodes=p[2], sourceref=_token_sref(p, 1))
def p_scope_elements_opt(p):
"""
scope_elements_opt : empty
| scope_elements
"""
p[0] = p[1]
def p_scope_elements(p):
"""
scope_elements : scope_element
| scope_elements scope_element
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[2]]
def p_scope_element(p):
"""
scope_element : ENDL
| label
| directive
| vardef
| subroutine
| inlineasm
| statement
"""
p[0] = p[1]
def p_label(p):
"""
label : LABEL
"""
p[0] = Label(name=p[1], sourceref=_token_sref(p, 1))
def p_inlineasm(p):
"""
inlineasm : INLINEASM ENDL
"""
p[0] = InlineAssembly(assembly=p[1], sourceref=_token_sref(p, 1))
def p_vardef(p):
"""
vardef : VARTYPE type_opt NAME ENDL
"""
p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], sourceref=_token_sref(p, 1))
def p_vardef_value(p):
"""
vardef : VARTYPE type_opt NAME IS expression
"""
p[0] = VarDef(name=p[3], vartype=p[1], datatype=p[2], value=p[5], sourceref=_token_sref(p, 1))
def p_type_opt(p):
"""
type_opt : DATATYPE '(' dimensions ')'
| DATATYPE
| empty
"""
if len(p) == 5:
p[0] = Datatype(name=p[1], dimension=p[3], sourceref=_token_sref(p, 1))
elif len(p) == 2:
p[0] = Datatype(name=p[1], sourceref=_token_sref(p, 1))
def p_dimensions(p):
"""
dimensions : INTEGER
| dimensions ',' INTEGER
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[3]]
def p_literal_value(p):
"""literal_value : INTEGER
| FLOATINGPOINT
| STRING
| CHARACTER
| BOOLEAN"""
p[0] = p[1]
def p_subroutine(p):
"""
subroutine : SUB NAME '(' sub_param_spec ')' RARROW '(' sub_result_spec ')' subroutine_body ENDL
"""
body = p[10]
if isinstance(body, Scope):
p[0] = Subroutine(name=p[2], param_spec=p[4], result_spec=p[8], scope=body, sourceref=_token_sref(p, 1))
elif isinstance(body, int):
p[0] = Subroutine(name=p[2], param_spec=p[4], result_spec=p[8], address=body, sourceref=_token_sref(p, 1))
else:
raise TypeError("subroutine_body", p.slice)
def p_sub_param_spec(p):
"""
sub_param_spec : empty
| sub_param_list
"""
p[0] = p[1]
def p_sub_param_list(p):
"""
sub_param_list : sub_param
| sub_param_list ',' sub_param
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[3]]
def p_sub_param(p):
"""
sub_param : LABEL REGISTER
| REGISTER
"""
if len(p) == 3:
p[0] = (p[1], p[2])
elif len(p) == 2:
p[0] = (None, p[1])
def p_sub_result_spec(p):
"""
sub_result_spec : empty
| '?'
| sub_result_list
"""
if p[1] == '?':
p[0] = ['A', 'X', 'Y'] # '?' means: all registers clobbered
else:
p[0] = p[1]
def p_sub_result_list(p):
"""
sub_result_list : sub_result_reg
| sub_result_list ',' sub_result_reg
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[3]]
def p_sub_result_reg(p):
"""
sub_result_reg : REGISTER
| CLOBBEREDREGISTER
"""
p[0] = p[1]
def p_subroutine_body(p):
"""
subroutine_body : scope
| IS INTEGER
"""
if len(p) == 2:
p[0] = p[1]
else:
p[0] = p[2]
def p_statement(p):
"""
statement : assignment ENDL
| aug_assignment ENDL
| subroutine_call ENDL
| goto ENDL
| conditional_goto ENDL
| incrdecr ENDL
| return ENDL
"""
p[0] = p[1]
def p_incrdecr(p):
"""
incrdecr : assignment_target INCR
| assignment_target DECR
"""
p[0] = UnaryOp(operator=p[2], operand=p[1], sourceref=_token_sref(p, 1))
def p_call_subroutine(p):
"""
subroutine_call : calltarget preserveregs_opt '(' call_arguments_opt ')'
"""
p[0] = SubCall(target=p[1], preserve_regs=p[2], arguments=p[4], sourceref=_token_sref(p, 1))
def p_preserveregs_opt(p):
"""
preserveregs_opt : empty
| preserveregs
"""
p[0] = p[1]
def p_preserveregs(p):
"""
preserveregs : PRESERVEREGS
"""
p[0] = PreserveRegs(registers=p[1], sourceref=_token_sref(p, 1))
def p_call_arguments_opt(p):
"""
call_arguments_opt : empty
| call_arguments
"""
p[0] = p[1]
def p_call_arguments(p):
"""
call_arguments : call_argument
| call_arguments ',' call_argument
"""
if len(p) == 2:
p[0] = [p[1]]
else:
p[0] = p[1] + [p[3]]
def p_call_argument(p):
"""
call_argument : expression
| register IS expression
| NAME IS expression
"""
if len(p) == 2:
p[0] = CallArgument(value=p[1], sourceref=_token_sref(p, 1))
elif len(p) == 4:
p[0] = CallArgument(name=p[1], value=p[3], sourceref=_token_sref(p, 1))
def p_return(p):
"""
return : RETURN
| RETURN expression
| RETURN expression ',' expression
| RETURN expression ',' expression ',' expression
"""
if len(p) == 2:
p[0] = Return(sourceref=_token_sref(p, 1))
elif len(p) == 3:
p[0] = Return(value_A=p[2], sourceref=_token_sref(p, 1))
elif len(p) == 5:
p[0] = Return(value_A=p[2], value_X=p[4], sourceref=_token_sref(p, 1))
elif len(p) == 7:
p[0] = Return(value_A=p[2], value_X=p[4], value_Y=p[6], sourceref=_token_sref(p, 1))
def p_register(p):
"""
register : REGISTER
"""
p[0] = Register(name=p[1], sourceref=_token_sref(p, 1))
def p_goto(p):
"""
goto : GOTO calltarget
"""
p[0] = Goto(target=p[2], sourceref=_token_sref(p, 1))
def p_conditional_goto_plain(p):
"""
conditional_goto : IF GOTO calltarget
"""
p[0] = Goto(target=p[3], if_stmt=p[1], sourceref=_token_sref(p, 1))
def p_conditional_goto_expr(p):
"""
conditional_goto : IF expression GOTO calltarget
"""
p[0] = Goto(target=p[4], if_stmt=p[1], condition=p[2], sourceref=_token_sref(p, 1))
def p_calltarget(p):
"""
calltarget : symbolname
| INTEGER
| BITAND symbolname
| dereference
"""
if len(p) == 2:
p[0] = CallTarget(target=p[1], address_of=False, sourceref=_token_sref(p, 1))
elif len(p) == 3:
p[0] = CallTarget(target=p[2], address_of=True, sourceref=_token_sref(p, 1))
def p_dereference(p):
"""
dereference : '[' dereference_operand ']'
"""
p[0] = Dereference(location=p[2][0], datatype=p[2][1], sourceref=_token_sref(p, 1))
def p_dereference_operand(p):
"""
dereference_operand : symbolname type_opt
| REGISTER type_opt
| INTEGER type_opt
"""
p[0] = (p[1], p[2])
def p_symbolname(p):
"""
symbolname : NAME
| DOTTEDNAME
"""
p[0] = p[1]
def p_assignment(p):
"""
assignment : assignment_target IS expression
| assignment_target IS assignment
"""
p[0] = Assignment(left=p[1], right=p[3], sourceref=_token_sref(p, 1))
def p_aug_assignment(p):
"""
aug_assignment : assignment_target AUGASSIGN expression
"""
p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('right', 'UNARY_MINUS', 'BITINVERT', "UNARY_ADDRESSOF"),
('left', "LT", "GT", "LE", "GE", "EQUALS", "NOTEQUALS"),
('nonassoc', "COMMENT"),
)
def p_expression(p):
"""
expression : expression '+' expression
| expression '-' expression
| expression '*' expression
| expression '/' expression
| expression LT expression
| expression GT expression
| expression LE expression
| expression GE expression
| expression EQUALS expression
| expression NOTEQUALS expression
"""
p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
def p_expression_uminus(p):
"""
expression : '-' expression %prec UNARY_MINUS
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
def p_expression_addressof(p):
"""
expression : BITAND symbolname %prec UNARY_ADDRESSOF
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
def p_unary_expression_bitinvert(p):
"""
expression : BITINVERT expression
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
def p_expression_group(p):
"""
expression : '(' expression ')'
"""
p[0] = p[2]
def p_expression_expr_value(p):
"""expression : expression_value"""
p[0] = p[1]
def p_expression_value(p):
"""
expression_value : literal_value
| symbolname
| register
| subroutine_call
| dereference
"""
p[0] = p[1]
def p_assignment_target(p):
"""
assignment_target : target_registers
| symbolname
| dereference
"""
p[0] = p[1]
def p_target_registers(p):
"""
target_registers : register
| target_registers ',' register
"""
if len(p) == 2:
p[0] = TargetRegisters(registers=[p[1]], sourceref=_token_sref(p, 1))
else:
p[1].add(p[3])
p[0] = p[1]
def p_empty(p):
"""empty :"""
pass
def p_error(p):
if p:
sref = SourceRef(p.lexer.source_filename, p.lineno, find_tok_column(p))
p.lexer.error_function("{}: before '{:.20s}' ({})", sref, str(p.value), repr(p))
else:
lexer.error_function("{}: at end of input", "@todo-filename3")
def _token_sref(p, token_idx):
""" Returns the coordinates for the YaccProduction object 'p' indexed
with 'token_idx'. The coordinate includes the 'lineno' and
'column'. Both follow the lex semantic, starting from 1.
"""
last_cr = p.lexer.lexdata.rfind('\n', 0, p.lexpos(token_idx))
if last_cr < 0:
last_cr = -1
column = (p.lexpos(token_idx) - last_cr)
return SourceRef(p.lexer.source_filename, p.lineno(token_idx), column)
class TokenFilter:
def __init__(self, lexer):
self.lexer = lexer
self.prev_was_EOL = False
assert "ENDL" in tokens
def token(self):
# make sure we only ever emit ONE "ENDL" token in sequence
if self.prev_was_EOL:
# skip all EOLS that might follow
while True:
tok = self.lexer.token()
if not tok or tok.type != "ENDL":
break
self.prev_was_EOL = False
else:
tok = self.lexer.token()
self.prev_was_EOL = tok and tok.type == "ENDL"
return tok
parser = yacc(write_tables=True)
if __name__ == "__main__":
import sys
file = sys.stdin # open(sys.argv[1], "rU")
lexer.source_filename = "derp"
tokenfilter = TokenFilter(lexer)
result = parser.parse(input=file.read(),
tokenfunc=tokenfilter.token) or Module(None, SourceRef(lexer.source_filename, 1, 1))
print("RESULT:")
result.print_tree()