This commit is contained in:
Irmen de Jong 2018-01-08 01:51:36 +01:00
parent 12c3ddd207
commit 7d8c2bf161
6 changed files with 302 additions and 57 deletions

View File

@ -1,9 +1,20 @@
"""
Programming Language for 6502/6510 microprocessors
This is the compiler of the IL65 code, that prepares the parse tree for code generation.
Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
import re
import os import os
import sys import sys
import linecache import linecache
from typing import Optional, Generator, Tuple, Set from typing import Optional, Tuple, Set, Dict, Any, List
from .plyparser import parse_file, Module, Directive, Block, Subroutine, AstNode from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
from .plylexer import SourceRef SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters
from .plylexer import SourceRef, print_bold
from .optimizer import optimize
class ParseError(Exception): class ParseError(Exception):
@ -26,41 +37,138 @@ class PlyParser:
module = parse_file(filename, self.lexer_error) module = parse_file(filename, self.lexer_error)
try: try:
self.check_directives(module) self.check_directives(module)
self.remove_empty_blocks(module)
self.process_imports(module) self.process_imports(module)
self.create_multiassigns(module)
if not self.parsing_import:
self.determine_subroutine_usage(module)
except ParseError as x: except ParseError as x:
self.handle_parse_error(x) self.handle_parse_error(x)
if self.parse_errors: if self.parse_errors:
self.print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors)) print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors))
raise SystemExit(1) raise SystemExit(1)
return module return module
def lexer_error(self, sourceref: SourceRef, fmtstring: str, *args: str) -> None: def lexer_error(self, sourceref: SourceRef, fmtstring: str, *args: str) -> None:
self.parse_errors += 1 self.parse_errors += 1
self.print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args))) print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
def remove_empty_blocks(self, module: Module) -> None: def create_multiassigns(self, module: Module) -> None:
# remove blocks without name and without address, or that are empty # create multi-assign statements from nested assignments (A=B=C=5),
for scope, parent in self.recurse_scopes(module): # and optimize TargetRegisters down to single Register if it's just one register.
if isinstance(scope, (Subroutine, Block)): def simplify_targetregisters(targets: List[Any]) -> List[Any]:
if not scope.scope: new_targets = []
continue for t in targets:
if all(isinstance(n, Directive) for n in scope.scope.nodes): if isinstance(t, TargetRegisters) and len(t.registers) == 1:
empty = True t = t.registers[0]
for n in scope.scope.nodes: new_targets.append(t)
empty = empty and n.name not in {"asmbinary", "asminclude"} return new_targets
if empty:
self.print_warning("ignoring empty block or subroutine", scope.sourceref) def reduce_right(assign: Assignment) -> Assignment:
assert isinstance(parent, (Block, Module)) if isinstance(assign.right, Assignment):
parent.scope.nodes.remove(scope) right = reduce_right(assign.right)
if isinstance(scope, Block): targets = simplify_targetregisters(right.left)
if not scope.name and scope.address is None: assign.left.extend(targets)
self.print_warning("ignoring block without name and address", scope.sourceref) assign.right = right.right
assert isinstance(parent, Module) return assign
parent.scope.nodes.remove(scope)
for mnode, parent in module.all_scopes():
if mnode.scope:
for node in mnode.scope.nodes:
if isinstance(node, Assignment):
node.left = simplify_targetregisters(node.left)
if isinstance(node.right, Assignment):
multi = reduce_right(node)
assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
def determine_subroutine_usage(self, module: Module) -> None:
module.subroutine_usage.clear()
for mnode, parent in module.all_scopes():
if mnode.scope:
for node in mnode.scope.nodes:
if isinstance(node, InlineAssembly):
self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, SubCall):
self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Goto):
self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Return):
self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Assignment):
self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
subcall: SubCall, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.arguments (list of CallArgument)
# CallArgument.value = expression.
if isinstance(subcall.target.target, str):
try:
scopename, name = subcall.target.target.split('.')
except ValueError:
scopename = parent_scope.name
name = subcall.target.target
usages[(scopename, name)].add(str(subcall.sourceref))
for arg in subcall.arguments:
self._parse_expression_for_subroutine_usages(usages, arg.value, parent_scope)
def _parse_expression_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
expr: Any, parent_scope: Scope) -> None:
if expr is None or isinstance(expr, (int, str, float, bool, Register)):
return
elif isinstance(expr, SubCall):
self._parse_subcall_for_subroutine_usages(usages, expr, parent_scope)
elif isinstance(expr, Expression):
self._parse_expression_for_subroutine_usages(usages, expr.left, parent_scope)
self._parse_expression_for_subroutine_usages(usages, expr.right, parent_scope)
else:
print("@todo parse expression for subroutine usage:", expr) # @todo
def _parse_goto_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
goto: Goto, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.condition (expression)
if isinstance(goto.target.target, str):
try:
symbol = parent_scope[goto.target.target]
except LookupError:
return
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, symbol.name)].add(str(goto.sourceref))
self._parse_expression_for_subroutine_usages(usages, goto.condition, parent_scope)
def _parse_return_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
returnnode: Return, parent_scope: Scope) -> None:
# node.value_A (expression), value_X (expression), value_Y (expression)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_A, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_X, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_Y, parent_scope)
def _parse_assignment_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
assignment: Assignment, parent_scope: Scope) -> None:
# node.right (expression, or another Assignment)
if isinstance(assignment.right, Assignment):
self._parse_assignment_for_subroutine_usages(usages, assignment.right, parent_scope)
else:
self._parse_expression_for_subroutine_usages(usages, assignment.right, parent_scope)
def _parse_asm_for_subroutine_usage(self, usages: Dict[Tuple[str, str], Set[str]],
asmnode: InlineAssembly, parent_scope: Scope) -> None:
# asm can refer to other symbols as well, track subroutine usage
for line in asmnode.assembly.splitlines():
splits = line.split(maxsplit=1)
if len(splits) == 2:
for match in re.finditer(r"(?P<symbol>[a-zA-Z_$][a-zA-Z0-9_\.]+)", splits[1]):
name = match.group("symbol")
if name[0] == '$':
continue
try:
symbol = parent_scope[name]
except LookupError:
pass
else:
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, symbol.name)].add(str(asmnode.sourceref))
def check_directives(self, module: Module) -> None: def check_directives(self, module: Module) -> None:
for node, parent in self.recurse_scopes(module): for node, parent in module.all_scopes():
if isinstance(node, Module): if isinstance(node, Module):
# check module-level directives # check module-level directives
imports = set() # type: Set[str] imports = set() # type: Set[str]
@ -84,14 +192,6 @@ class PlyParser:
raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref) raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref)
first_node = False first_node = False
def recurse_scopes(self, module: Module) -> Generator[Tuple[AstNode, AstNode], None, None]:
# generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
yield module, None
for block in list(module.scope.filter_nodes(Block)):
yield block, module
for subroutine in list(block.scope.filter_nodes(Subroutine)):
yield subroutine, block
def process_imports(self, module: Module) -> None: def process_imports(self, module: Module) -> None:
# (recursively) imports the modules # (recursively) imports the modules
imported = [] imported = []
@ -141,18 +241,6 @@ class PlyParser:
return filename return filename
return None return None
def print_warning(self, text: str, sourceref: SourceRef=None) -> None:
if sourceref:
self.print_bold("warning: {}: {:s}".format(sourceref, text))
else:
self.print_bold("warning: " + text)
def print_bold(self, text: str) -> None:
if sys.stdout.isatty():
print("\x1b[1m" + text + "\x1b[0m", flush=True)
else:
print(text)
def handle_parse_error(self, exc: ParseError) -> None: def handle_parse_error(self, exc: ParseError) -> None:
self.parse_errors += 1 self.parse_errors += 1
if sys.stderr.isatty(): if sys.stderr.isatty():
@ -176,6 +264,9 @@ class PlyParser:
if __name__ == "__main__": if __name__ == "__main__":
description = "Compiler for IL65 language, code name 'Sick'"
print("\n" + description)
plyparser = PlyParser() plyparser = PlyParser()
m = plyparser.parse_file(sys.argv[1]) m = plyparser.parse_file(sys.argv[1])
print(str(m)[:400], "...") optimize(m)
print()

View File

@ -131,7 +131,7 @@ class Optimizer:
for sub in list(block.symbols.iter_subroutines()): for sub in list(block.symbols.iter_subroutines()):
usages = self.parsed.subroutine_usage[(sub.blockname, sub.name)] usages = self.parsed.subroutine_usage[(sub.blockname, sub.name)]
if not usages and sub.blockname + '.' + sub.name not in never_remove: if not usages and sub.blockname + '.' + sub.name not in never_remove:
block.symbols.discard_sub(sub.name) block.symbols.remove_node(sub.name)
discarded.append(sub.name) discarded.append(sub.name)
if discarded: if discarded:
print("{}: discarded {:d} unused subroutines from block '{:s}'".format(block.sourceref, len(discarded), block.name)) print("{}: discarded {:d} unused subroutines from block '{:s}'".format(block.sourceref, len(discarded), block.name))

99
il65/optimizer.py Normal file
View File

@ -0,0 +1,99 @@
"""
Programming Language for 6502/6510 microprocessors
This is the code to optimize the parse tree.
Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment
from .plylexer import print_warning, print_bold
class Optimizer:
def __init__(self, mod: Module) -> None:
self.num_warnings = 0
self.module = mod
def optimize(self) -> None:
self.num_warnings = 0
# self.remove_augmentedassign_incrdecr_nops(block) # @todo
self.remove_useless_assigns()
# self.combine_assignments_into_multi(block) # @todo
self.optimize_multiassigns()
self.remove_unused_subroutines()
# self.optimize_compare_with_zero(block) # @todo
self.remove_empty_blocks()
def remove_useless_assigns(self) -> None:
# remove assignment statements that do nothing (A=A)
for mnode, parent in self.module.all_scopes():
if mnode.scope:
for assignment in list(mnode.scope.nodes):
if isinstance(assignment, Assignment):
assignment.left = [lv for lv in assignment.left if lv != assignment.right]
if not assignment.left:
mnode.scope.remove_node(assignment)
self.num_warnings += 1
print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref))
def optimize_multiassigns(self) -> None:
# optimize multi-assign statements (remove duplicate targets, optimize order)
for mnode, parent in self.module.all_scopes():
if mnode.scope:
for assignment in mnode.scope.nodes:
if isinstance(assignment, Assignment) and len(assignment.left) > 1:
# remove duplicates
lvalues = set(assignment.left)
if len(lvalues) != len(assignment.left):
self.num_warnings += 1
print_warning("{}: removed duplicate assignment targets".format(assignment.sourceref))
# @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
assignment.left = list(lvalues)
def remove_unused_subroutines(self) -> None:
# some symbols are used by the emitted assembly code from the code generator,
# and should never be removed or the assembler will fail
never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB",
"c64flt.GIVUAYF", "c64flt.copy_mflt", "c64flt.float_add_one", "c64flt.float_sub_one",
"c64flt.float_add_SW1_to_XY", "c64flt.float_sub_SW1_from_XY"}
num_discarded = 0
for sub, parent in self.module.all_scopes():
if isinstance(sub, Subroutine):
usages = self.module.subroutine_usage[(parent.name, sub.name)]
if not usages and parent.name + '.' + sub.name not in never_remove:
parent.scope.remove_node(sub)
num_discarded += 1
print("discarded {:d} unused subroutines".format(num_discarded))
def remove_empty_blocks(self) -> None:
# remove blocks without name and without address, or that are empty
for node, parent in self.module.all_scopes():
if isinstance(node, (Subroutine, Block)):
if not node.scope:
continue
if all(isinstance(n, Directive) for n in node.scope.nodes):
empty = True
for n in node.scope.nodes:
empty = empty and n.name not in {"asmbinary", "asminclude"}
if empty:
self.num_warnings += 1
print_warning("ignoring empty block or subroutine", node.sourceref)
assert isinstance(parent, (Block, Module))
parent.scope.nodes.remove(node)
if isinstance(node, Block):
if not node.name and node.address is None:
self.num_warnings += 1
print_warning("ignoring block without name and address", node.sourceref)
assert isinstance(parent, Module)
parent.scope.nodes.remove(node)
def optimize(mod: Module) -> None:
opt = Optimizer(mod)
opt.optimize()
if opt.num_warnings:
if opt.num_warnings == 1:
print_bold("there is one optimization warning.")
else:
print_bold("there are {:d} optimization warnings.".format(opt.num_warnings))

View File

@ -147,11 +147,10 @@ reserved = {
# rules for tokens with some actions # rules for tokens with some actions
def t_inlineasm(t): def t_inlineasm(t):
r"%asm\s*\{\s*" r"%asm\s*\{[^\S\n]*"
t.lexer.code_start = t.lexer.lexpos # Record start position t.lexer.code_start = t.lexer.lexpos # Record start position
t.lexer.level = 1 # initial brace level t.lexer.level = 1 # initial brace level
t.lexer.begin("inlineasm") # enter state 'inlineasm' t.lexer.begin("inlineasm") # enter state 'inlineasm'
t.lexer.lineno += 1
def t_inlineasm_lbrace(t): def t_inlineasm_lbrace(t):
@ -320,6 +319,20 @@ def find_tok_column(token):
return token.lexpos - last_cr return token.lexpos - last_cr
def print_warning(text: str, sourceref: SourceRef = None) -> None:
if sourceref:
print_bold("warning: {}: {:s}".format(sourceref, text))
else:
print_bold("warning: " + text)
def print_bold(text: str) -> None:
if sys.stdout.isatty():
print("\x1b[1m" + text + "\x1b[0m", flush=True)
else:
print(text)
lexer = ply.lex.lex() lexer = ply.lex.lex()

View File

@ -6,9 +6,10 @@ Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE License: GNU GPL 3.0, see LICENSE
""" """
from collections import defaultdict
import attr import attr
from ply.yacc import yacc from ply.yacc import yacc
from typing import Union, Generator from typing import Union, Generator, Tuple, List
from .plylexer import SourceRef, tokens, lexer, find_tok_column from .plylexer import SourceRef, tokens, lexer, find_tok_column
@ -104,11 +105,26 @@ class Scope(AstNode):
if isinstance(node, nodetype): if isinstance(node, nodetype):
yield node yield node
def remove_node(self, node: AstNode) -> None:
if hasattr(node, "name"):
del self.symbols[node.name]
self.nodes.remove(node)
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
class Module(AstNode): class Module(AstNode):
name = attr.ib(type=str) # filename name = attr.ib(type=str) # filename
scope = attr.ib(type=Scope) scope = attr.ib(type=Scope)
subroutine_usage = attr.ib(type=defaultdict, init=False, default=attr.Factory(lambda: defaultdict(set))) # will be populated later
def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]:
# generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
# it iterates of copies of the node collections, so it's okay to modify the scopes you iterate over.
yield self, None
for block in list(self.scope.filter_nodes(Block)):
yield block, self
for subroutine in list(block.scope.filter_nodes(Subroutine)):
yield subroutine, block
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
@ -130,6 +146,19 @@ class Label(AstNode):
class Register(AstNode): class Register(AstNode):
name = attr.ib(type=str) name = attr.ib(type=str)
def __hash__(self) -> int:
return hash(self.name)
def __eq__(self, other) -> bool:
if not isinstance(other, Register):
return NotImplemented
return self.name == other.name
def __lt__(self, other) -> bool:
if not isinstance(other, Register):
return NotImplemented
return self.name < other.name
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
class PreserveRegs(AstNode): class PreserveRegs(AstNode):
@ -138,13 +167,16 @@ class PreserveRegs(AstNode):
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
class Assignment(AstNode): class Assignment(AstNode):
left = attr.ib() # type: Union[str, TargetRegisters, Dereference] # can be single- or multi-assignment
left = attr.ib(type=list) # type: List[Union[str, TargetRegisters, Dereference]]
right = attr.ib() right = attr.ib()
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
class AugAssignment(Assignment): class AugAssignment(AstNode):
left = attr.ib()
operator = attr.ib(type=str) operator = attr.ib(type=str)
right = attr.ib()
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
@ -153,6 +185,9 @@ class SubCall(AstNode):
preserve_regs = attr.ib() preserve_regs = attr.ib()
arguments = attr.ib() arguments = attr.ib()
def __attrs_post_init__(self):
self.arguments = self.arguments or []
@attr.s(cmp=False, repr=False) @attr.s(cmp=False, repr=False)
class Return(AstNode): class Return(AstNode):
@ -689,14 +724,14 @@ def p_assignment(p):
assignment : assignment_target IS expression assignment : assignment_target IS expression
| assignment_target IS assignment | assignment_target IS assignment
""" """
p[0] = Assignment(left=p[1], right=p[3], sourceref=_token_sref(p, 1)) p[0] = Assignment(left=[p[1]], right=p[3], sourceref=_token_sref(p, 2))
def p_aug_assignment(p): def p_aug_assignment(p):
""" """
aug_assignment : assignment_target AUGASSIGN expression aug_assignment : assignment_target AUGASSIGN expression
""" """
p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1)) p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
precedence = ( precedence = (
@ -721,7 +756,7 @@ def p_expression(p):
| expression EQUALS expression | expression EQUALS expression
| expression NOTEQUALS expression | expression NOTEQUALS expression
""" """
p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1)) p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
def p_expression_uminus(p): def p_expression_uminus(p):

View File

@ -24,6 +24,12 @@ start:
c64.CHROUT('\n') c64.CHROUT('\n')
return return
screen = border = cursor = X = Y = A = X = Y = A = border = cursor = border = cursor = 66 ; multi-assign!
X = Y = A = X = Y = A = X = Y = A = X = Y = AX = Y = A = X = AY = XY =A = 123 ; multi-assign!
XY = XY
A= A
A=X=Y=A
rndloop: rndloop:
XY = math.randword() XY = math.randword()
@ -126,6 +132,7 @@ rndloop:
bne - bne -
} }
loop : loop :
A=c64.GETIN() A=c64.GETIN()
if_not goto loop if_not goto loop
@ -172,9 +179,9 @@ loop :
sub sub1 () -> () { sub sub1 () -> () {
%breakpoint
%saveregisters off %saveregisters off
%breakpoint %breakpoint
%breakpoint
label: label:
return return