This commit is contained in:
Irmen de Jong 2018-01-08 01:51:36 +01:00
parent 12c3ddd207
commit 7d8c2bf161
6 changed files with 302 additions and 57 deletions

View File

@ -1,9 +1,20 @@
"""
Programming Language for 6502/6510 microprocessors
This is the compiler of the IL65 code, that prepares the parse tree for code generation.
Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
import re
import os
import sys
import linecache
from typing import Optional, Generator, Tuple, Set
from .plyparser import parse_file, Module, Directive, Block, Subroutine, AstNode
from .plylexer import SourceRef
from typing import Optional, Tuple, Set, Dict, Any, List
from .plyparser import parse_file, Module, Directive, Block, Subroutine, Scope, \
SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, TargetRegisters
from .plylexer import SourceRef, print_bold
from .optimizer import optimize
class ParseError(Exception):
@ -26,41 +37,138 @@ class PlyParser:
module = parse_file(filename, self.lexer_error)
try:
self.check_directives(module)
self.remove_empty_blocks(module)
self.process_imports(module)
self.create_multiassigns(module)
if not self.parsing_import:
self.determine_subroutine_usage(module)
except ParseError as x:
self.handle_parse_error(x)
if self.parse_errors:
self.print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors))
print_bold("\nNo output; there were {:d} errors.\n".format(self.parse_errors))
raise SystemExit(1)
return module
def lexer_error(self, sourceref: SourceRef, fmtstring: str, *args: str) -> None:
self.parse_errors += 1
self.print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args)))
def remove_empty_blocks(self, module: Module) -> None:
# remove blocks without name and without address, or that are empty
for scope, parent in self.recurse_scopes(module):
if isinstance(scope, (Subroutine, Block)):
if not scope.scope:
continue
if all(isinstance(n, Directive) for n in scope.scope.nodes):
empty = True
for n in scope.scope.nodes:
empty = empty and n.name not in {"asmbinary", "asminclude"}
if empty:
self.print_warning("ignoring empty block or subroutine", scope.sourceref)
assert isinstance(parent, (Block, Module))
parent.scope.nodes.remove(scope)
if isinstance(scope, Block):
if not scope.name and scope.address is None:
self.print_warning("ignoring block without name and address", scope.sourceref)
assert isinstance(parent, Module)
parent.scope.nodes.remove(scope)
def create_multiassigns(self, module: Module) -> None:
# create multi-assign statements from nested assignments (A=B=C=5),
# and optimize TargetRegisters down to single Register if it's just one register.
def simplify_targetregisters(targets: List[Any]) -> List[Any]:
new_targets = []
for t in targets:
if isinstance(t, TargetRegisters) and len(t.registers) == 1:
t = t.registers[0]
new_targets.append(t)
return new_targets
def reduce_right(assign: Assignment) -> Assignment:
if isinstance(assign.right, Assignment):
right = reduce_right(assign.right)
targets = simplify_targetregisters(right.left)
assign.left.extend(targets)
assign.right = right.right
return assign
for mnode, parent in module.all_scopes():
if mnode.scope:
for node in mnode.scope.nodes:
if isinstance(node, Assignment):
node.left = simplify_targetregisters(node.left)
if isinstance(node.right, Assignment):
multi = reduce_right(node)
assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment)
def determine_subroutine_usage(self, module: Module) -> None:
module.subroutine_usage.clear()
for mnode, parent in module.all_scopes():
if mnode.scope:
for node in mnode.scope.nodes:
if isinstance(node, InlineAssembly):
self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, SubCall):
self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Goto):
self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Return):
self._parse_return_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
elif isinstance(node, Assignment):
self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, mnode.scope)
def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
subcall: SubCall, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.arguments (list of CallArgument)
# CallArgument.value = expression.
if isinstance(subcall.target.target, str):
try:
scopename, name = subcall.target.target.split('.')
except ValueError:
scopename = parent_scope.name
name = subcall.target.target
usages[(scopename, name)].add(str(subcall.sourceref))
for arg in subcall.arguments:
self._parse_expression_for_subroutine_usages(usages, arg.value, parent_scope)
def _parse_expression_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
expr: Any, parent_scope: Scope) -> None:
if expr is None or isinstance(expr, (int, str, float, bool, Register)):
return
elif isinstance(expr, SubCall):
self._parse_subcall_for_subroutine_usages(usages, expr, parent_scope)
elif isinstance(expr, Expression):
self._parse_expression_for_subroutine_usages(usages, expr.left, parent_scope)
self._parse_expression_for_subroutine_usages(usages, expr.right, parent_scope)
else:
print("@todo parse expression for subroutine usage:", expr) # @todo
def _parse_goto_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
goto: Goto, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.condition (expression)
if isinstance(goto.target.target, str):
try:
symbol = parent_scope[goto.target.target]
except LookupError:
return
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, symbol.name)].add(str(goto.sourceref))
self._parse_expression_for_subroutine_usages(usages, goto.condition, parent_scope)
def _parse_return_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
returnnode: Return, parent_scope: Scope) -> None:
# node.value_A (expression), value_X (expression), value_Y (expression)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_A, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_X, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_Y, parent_scope)
def _parse_assignment_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
assignment: Assignment, parent_scope: Scope) -> None:
# node.right (expression, or another Assignment)
if isinstance(assignment.right, Assignment):
self._parse_assignment_for_subroutine_usages(usages, assignment.right, parent_scope)
else:
self._parse_expression_for_subroutine_usages(usages, assignment.right, parent_scope)
def _parse_asm_for_subroutine_usage(self, usages: Dict[Tuple[str, str], Set[str]],
asmnode: InlineAssembly, parent_scope: Scope) -> None:
# asm can refer to other symbols as well, track subroutine usage
for line in asmnode.assembly.splitlines():
splits = line.split(maxsplit=1)
if len(splits) == 2:
for match in re.finditer(r"(?P<symbol>[a-zA-Z_$][a-zA-Z0-9_\.]+)", splits[1]):
name = match.group("symbol")
if name[0] == '$':
continue
try:
symbol = parent_scope[name]
except LookupError:
pass
else:
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, symbol.name)].add(str(asmnode.sourceref))
def check_directives(self, module: Module) -> None:
for node, parent in self.recurse_scopes(module):
for node, parent in module.all_scopes():
if isinstance(node, Module):
# check module-level directives
imports = set() # type: Set[str]
@ -84,14 +192,6 @@ class PlyParser:
raise ParseError("saveregisters directive should be the first", None, sub_node.sourceref)
first_node = False
def recurse_scopes(self, module: Module) -> Generator[Tuple[AstNode, AstNode], None, None]:
# generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
yield module, None
for block in list(module.scope.filter_nodes(Block)):
yield block, module
for subroutine in list(block.scope.filter_nodes(Subroutine)):
yield subroutine, block
def process_imports(self, module: Module) -> None:
# (recursively) imports the modules
imported = []
@ -141,18 +241,6 @@ class PlyParser:
return filename
return None
def print_warning(self, text: str, sourceref: SourceRef=None) -> None:
if sourceref:
self.print_bold("warning: {}: {:s}".format(sourceref, text))
else:
self.print_bold("warning: " + text)
def print_bold(self, text: str) -> None:
if sys.stdout.isatty():
print("\x1b[1m" + text + "\x1b[0m", flush=True)
else:
print(text)
def handle_parse_error(self, exc: ParseError) -> None:
self.parse_errors += 1
if sys.stderr.isatty():
@ -176,6 +264,9 @@ class PlyParser:
if __name__ == "__main__":
description = "Compiler for IL65 language, code name 'Sick'"
print("\n" + description)
plyparser = PlyParser()
m = plyparser.parse_file(sys.argv[1])
print(str(m)[:400], "...")
optimize(m)
print()

View File

@ -131,7 +131,7 @@ class Optimizer:
for sub in list(block.symbols.iter_subroutines()):
usages = self.parsed.subroutine_usage[(sub.blockname, sub.name)]
if not usages and sub.blockname + '.' + sub.name not in never_remove:
block.symbols.discard_sub(sub.name)
block.symbols.remove_node(sub.name)
discarded.append(sub.name)
if discarded:
print("{}: discarded {:d} unused subroutines from block '{:s}'".format(block.sourceref, len(discarded), block.name))

99
il65/optimizer.py Normal file
View File

@ -0,0 +1,99 @@
"""
Programming Language for 6502/6510 microprocessors
This is the code to optimize the parse tree.
Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
from .plyparser import Module, Subroutine, Block, Directive, Assignment, AugAssignment
from .plylexer import print_warning, print_bold
class Optimizer:
def __init__(self, mod: Module) -> None:
self.num_warnings = 0
self.module = mod
def optimize(self) -> None:
self.num_warnings = 0
# self.remove_augmentedassign_incrdecr_nops(block) # @todo
self.remove_useless_assigns()
# self.combine_assignments_into_multi(block) # @todo
self.optimize_multiassigns()
self.remove_unused_subroutines()
# self.optimize_compare_with_zero(block) # @todo
self.remove_empty_blocks()
def remove_useless_assigns(self) -> None:
# remove assignment statements that do nothing (A=A)
for mnode, parent in self.module.all_scopes():
if mnode.scope:
for assignment in list(mnode.scope.nodes):
if isinstance(assignment, Assignment):
assignment.left = [lv for lv in assignment.left if lv != assignment.right]
if not assignment.left:
mnode.scope.remove_node(assignment)
self.num_warnings += 1
print_warning("{}: removed assignment statement that has no effect".format(assignment.sourceref))
def optimize_multiassigns(self) -> None:
# optimize multi-assign statements (remove duplicate targets, optimize order)
for mnode, parent in self.module.all_scopes():
if mnode.scope:
for assignment in mnode.scope.nodes:
if isinstance(assignment, Assignment) and len(assignment.left) > 1:
# remove duplicates
lvalues = set(assignment.left)
if len(lvalues) != len(assignment.left):
self.num_warnings += 1
print_warning("{}: removed duplicate assignment targets".format(assignment.sourceref))
# @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
assignment.left = list(lvalues)
def remove_unused_subroutines(self) -> None:
# some symbols are used by the emitted assembly code from the code generator,
# and should never be removed or the assembler will fail
never_remove = {"c64.FREADUY", "c64.FTOMEMXY", "c64.FADD", "c64.FSUB",
"c64flt.GIVUAYF", "c64flt.copy_mflt", "c64flt.float_add_one", "c64flt.float_sub_one",
"c64flt.float_add_SW1_to_XY", "c64flt.float_sub_SW1_from_XY"}
num_discarded = 0
for sub, parent in self.module.all_scopes():
if isinstance(sub, Subroutine):
usages = self.module.subroutine_usage[(parent.name, sub.name)]
if not usages and parent.name + '.' + sub.name not in never_remove:
parent.scope.remove_node(sub)
num_discarded += 1
print("discarded {:d} unused subroutines".format(num_discarded))
def remove_empty_blocks(self) -> None:
# remove blocks without name and without address, or that are empty
for node, parent in self.module.all_scopes():
if isinstance(node, (Subroutine, Block)):
if not node.scope:
continue
if all(isinstance(n, Directive) for n in node.scope.nodes):
empty = True
for n in node.scope.nodes:
empty = empty and n.name not in {"asmbinary", "asminclude"}
if empty:
self.num_warnings += 1
print_warning("ignoring empty block or subroutine", node.sourceref)
assert isinstance(parent, (Block, Module))
parent.scope.nodes.remove(node)
if isinstance(node, Block):
if not node.name and node.address is None:
self.num_warnings += 1
print_warning("ignoring block without name and address", node.sourceref)
assert isinstance(parent, Module)
parent.scope.nodes.remove(node)
def optimize(mod: Module) -> None:
opt = Optimizer(mod)
opt.optimize()
if opt.num_warnings:
if opt.num_warnings == 1:
print_bold("there is one optimization warning.")
else:
print_bold("there are {:d} optimization warnings.".format(opt.num_warnings))

View File

@ -147,11 +147,10 @@ reserved = {
# rules for tokens with some actions
def t_inlineasm(t):
r"%asm\s*\{\s*"
r"%asm\s*\{[^\S\n]*"
t.lexer.code_start = t.lexer.lexpos # Record start position
t.lexer.level = 1 # initial brace level
t.lexer.begin("inlineasm") # enter state 'inlineasm'
t.lexer.lineno += 1
def t_inlineasm_lbrace(t):
@ -320,6 +319,20 @@ def find_tok_column(token):
return token.lexpos - last_cr
def print_warning(text: str, sourceref: SourceRef = None) -> None:
if sourceref:
print_bold("warning: {}: {:s}".format(sourceref, text))
else:
print_bold("warning: " + text)
def print_bold(text: str) -> None:
if sys.stdout.isatty():
print("\x1b[1m" + text + "\x1b[0m", flush=True)
else:
print(text)
lexer = ply.lex.lex()

View File

@ -6,9 +6,10 @@ Written by Irmen de Jong (irmen@razorvine.net)
License: GNU GPL 3.0, see LICENSE
"""
from collections import defaultdict
import attr
from ply.yacc import yacc
from typing import Union, Generator
from typing import Union, Generator, Tuple, List
from .plylexer import SourceRef, tokens, lexer, find_tok_column
@ -104,11 +105,26 @@ class Scope(AstNode):
if isinstance(node, nodetype):
yield node
def remove_node(self, node: AstNode) -> None:
if hasattr(node, "name"):
del self.symbols[node.name]
self.nodes.remove(node)
@attr.s(cmp=False, repr=False)
class Module(AstNode):
name = attr.ib(type=str) # filename
scope = attr.ib(type=Scope)
subroutine_usage = attr.ib(type=defaultdict, init=False, default=attr.Factory(lambda: defaultdict(set))) # will be populated later
def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]:
# generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
# it iterates of copies of the node collections, so it's okay to modify the scopes you iterate over.
yield self, None
for block in list(self.scope.filter_nodes(Block)):
yield block, self
for subroutine in list(block.scope.filter_nodes(Subroutine)):
yield subroutine, block
@attr.s(cmp=False, repr=False)
@ -130,6 +146,19 @@ class Label(AstNode):
class Register(AstNode):
name = attr.ib(type=str)
def __hash__(self) -> int:
return hash(self.name)
def __eq__(self, other) -> bool:
if not isinstance(other, Register):
return NotImplemented
return self.name == other.name
def __lt__(self, other) -> bool:
if not isinstance(other, Register):
return NotImplemented
return self.name < other.name
@attr.s(cmp=False, repr=False)
class PreserveRegs(AstNode):
@ -138,13 +167,16 @@ class PreserveRegs(AstNode):
@attr.s(cmp=False, repr=False)
class Assignment(AstNode):
left = attr.ib() # type: Union[str, TargetRegisters, Dereference]
# can be single- or multi-assignment
left = attr.ib(type=list) # type: List[Union[str, TargetRegisters, Dereference]]
right = attr.ib()
@attr.s(cmp=False, repr=False)
class AugAssignment(Assignment):
class AugAssignment(AstNode):
left = attr.ib()
operator = attr.ib(type=str)
right = attr.ib()
@attr.s(cmp=False, repr=False)
@ -153,6 +185,9 @@ class SubCall(AstNode):
preserve_regs = attr.ib()
arguments = attr.ib()
def __attrs_post_init__(self):
self.arguments = self.arguments or []
@attr.s(cmp=False, repr=False)
class Return(AstNode):
@ -689,14 +724,14 @@ def p_assignment(p):
assignment : assignment_target IS expression
| assignment_target IS assignment
"""
p[0] = Assignment(left=p[1], right=p[3], sourceref=_token_sref(p, 1))
p[0] = Assignment(left=[p[1]], right=p[3], sourceref=_token_sref(p, 2))
def p_aug_assignment(p):
"""
aug_assignment : assignment_target AUGASSIGN expression
"""
p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
p[0] = AugAssignment(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
precedence = (
@ -721,7 +756,7 @@ def p_expression(p):
| expression EQUALS expression
| expression NOTEQUALS expression
"""
p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 1))
p[0] = Expression(left=p[1], operator=p[2], right=p[3], sourceref=_token_sref(p, 2))
def p_expression_uminus(p):

View File

@ -24,6 +24,12 @@ start:
c64.CHROUT('\n')
return
screen = border = cursor = X = Y = A = X = Y = A = border = cursor = border = cursor = 66 ; multi-assign!
X = Y = A = X = Y = A = X = Y = A = X = Y = AX = Y = A = X = AY = XY =A = 123 ; multi-assign!
XY = XY
A= A
A=X=Y=A
rndloop:
XY = math.randword()
@ -126,6 +132,7 @@ rndloop:
bne -
}
loop :
A=c64.GETIN()
if_not goto loop
@ -172,9 +179,9 @@ loop :
sub sub1 () -> () {
%breakpoint
%saveregisters off
%breakpoint
%breakpoint
label:
return