expression

This commit is contained in:
Irmen de Jong 2018-01-10 00:44:11 +01:00
parent 0bb5f98768
commit 29060f3373
11 changed files with 466 additions and 152 deletions

View File

@ -1,7 +1,7 @@
IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
=====================================================================================
*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*
*Written by Irmen de Jong (irmen@razorvine.net)*
*Software license: GNU GPL 3.0, see file LICENSE*
@ -17,6 +17,7 @@ which aims to provide many conveniences over raw assembly code (even when using
- subroutines have enforced input- and output parameter definitions
- various data types other than just bytes (16-bit words, floats, strings, 16-bit register pairs)
- automatic variable allocations, automatic string variables and string sharing
- constant folding in expressions (compile-time evaluation)
- automatic type conversions
- floating point operations
- optional automatic preserving and restoring CPU registers state, when calling routines that otherwise would clobber these
@ -24,6 +25,7 @@ which aims to provide many conveniences over raw assembly code (even when using
- breakpoints, that let the Vice emulator drop into the monitor if execution hits them
- source code labels automatically loaded in Vice emulator so it can show them in disassembly
- conditional gotos
- some code optimizations (such as not repeatedly loading the same value in a register)
- @todo: loops
- @todo: memory block operations

View File

@ -11,8 +11,9 @@ import sys
import linecache
from typing import Optional, Tuple, Set, Dict, Any, no_type_check
import attr
from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, \
SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions
from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, LiteralValue, \
SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions,\
SymbolName, process_constant_expression, process_dynamic_expression
from .plylex import SourceRef, print_bold
from .optimize import optimize
@ -72,11 +73,8 @@ class PlyParser:
def process_all_expressions(self, module: Module) -> None:
# process/simplify all expressions (constant folding etc)
for block, parent in module.all_scopes():
if block.scope:
for node in block.scope.nodes:
if node is None:
print(block, block.scope, block.scope.nodes)
node.process_expressions()
for node in block.nodes:
node.process_expressions(block.scope)
@no_type_check
def create_multiassigns(self, module: Module) -> None:
@ -90,8 +88,7 @@ class PlyParser:
return assign
for block, parent in module.all_scopes():
if block.scope:
for node in block.scope.nodes:
for node in block.nodes:
if isinstance(node, Assignment):
if isinstance(node.right, Assignment):
multi = reduce_right(node)
@ -103,11 +100,11 @@ class PlyParser:
if not scope:
return
if len(save_dir.args) > 1:
raise ParseError("need zero or one directive argument", save_dir.sourceref)
raise ParseError("expected zero or one directive argument", save_dir.sourceref)
if save_dir.args:
if save_dir.args[0] in ("yes", "true"):
if save_dir.args[0] in ("yes", "true", True):
scope.save_registers = True
elif save_dir.args[0] in ("no", "false"):
elif save_dir.args[0] in ("no", "false", False):
scope.save_registers = False
else:
raise ParseError("invalid directive args", save_dir.sourceref)
@ -120,7 +117,7 @@ class PlyParser:
for directive in block.scope.filter_nodes(Directive):
if directive.name == "output":
if len(directive.args) != 1 or not isinstance(directive.args[0], str):
raise ParseError("need one str directive argument", directive.sourceref)
raise ParseError("expected one str directive argument", directive.sourceref)
if directive.args[0] == "raw":
block.format = ProgramFormat.RAW
block.address = 0xc000
@ -134,7 +131,7 @@ class PlyParser:
raise ParseError("invalid directive args", directive.sourceref)
elif directive.name == "address":
if len(directive.args) != 1 or not isinstance(directive.args[0], int):
raise ParseError("need one integer directive argument", directive.sourceref)
raise ParseError("expected one integer directive argument", directive.sourceref)
if block.format == ProgramFormat.BASIC:
raise ParseError("basic cannot have a custom load address", directive.sourceref)
block.address = directive.args[0]
@ -178,20 +175,19 @@ class PlyParser:
def determine_subroutine_usage(self, module: Module) -> None:
module.subroutine_usage.clear()
for block, parent in module.all_scopes():
if block.scope:
for node in block.scope.nodes:
for node in block.nodes:
if isinstance(node, InlineAssembly):
self._parse_asm_for_subroutine_usage(module.subroutine_usage, node, block.scope)
self._get_subroutine_usages_from_asm(module.subroutine_usage, node, block.scope)
elif isinstance(node, SubCall):
self._parse_subcall_for_subroutine_usages(module.subroutine_usage, node, block.scope)
self._get_subroutine_usages_from_subcall(module.subroutine_usage, node, block.scope)
elif isinstance(node, Goto):
self._parse_goto_for_subroutine_usages(module.subroutine_usage, node, block.scope)
self._get_subroutine_usages_from_goto(module.subroutine_usage, node, block.scope)
elif isinstance(node, Return):
self._parse_return_for_subroutine_usages(module.subroutine_usage, node, block.scope)
self._get_subroutine_usages_from_return(module.subroutine_usage, node, block.scope)
elif isinstance(node, Assignment):
self._parse_assignment_for_subroutine_usages(module.subroutine_usage, node, block.scope)
self._get_subroutine_usages_from_assignment(module.subroutine_usage, node, block.scope)
def _parse_subcall_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_subcall(self, usages: Dict[Tuple[str, str], Set[str]],
subcall: SubCall, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.arguments (list of CallArgument)
# CallArgument.value = expression.
@ -203,21 +199,30 @@ class PlyParser:
name = subcall.target.target
usages[(scopename, name)].add(str(subcall.sourceref))
for arg in subcall.arguments:
self._parse_expression_for_subroutine_usages(usages, arg.value, parent_scope)
self._get_subroutine_usages_from_expression(usages, arg.value, parent_scope)
def _parse_expression_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_expression(self, usages: Dict[Tuple[str, str], Set[str]],
expr: Any, parent_scope: Scope) -> None:
if expr is None or isinstance(expr, (int, str, float, bool, Register)):
return
elif isinstance(expr, SubCall):
self._parse_subcall_for_subroutine_usages(usages, expr, parent_scope)
self._get_subroutine_usages_from_subcall(usages, expr, parent_scope)
elif isinstance(expr, Expression):
self._parse_expression_for_subroutine_usages(usages, expr.left, parent_scope)
self._parse_expression_for_subroutine_usages(usages, expr.right, parent_scope)
self._get_subroutine_usages_from_expression(usages, expr.left, parent_scope)
self._get_subroutine_usages_from_expression(usages, expr.right, parent_scope)
elif isinstance(expr, LiteralValue):
return
elif isinstance(expr, SymbolName):
try:
symbol = parent_scope[expr.name]
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, expr.name)].add(str(expr.sourceref))
except LookupError:
pass
else:
print("@todo parse expression for subroutine usage:", expr) # @todo
raise TypeError("unknown expr type to scan for sub usages", expr, expr.sourceref)
def _parse_goto_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_goto(self, usages: Dict[Tuple[str, str], Set[str]],
goto: Goto, parent_scope: Scope) -> None:
# node.target (relevant if its a symbolname -- a str), node.condition (expression)
if isinstance(goto.target.target, str):
@ -227,24 +232,24 @@ class PlyParser:
return
if isinstance(symbol, Subroutine):
usages[(parent_scope.name, symbol.name)].add(str(goto.sourceref))
self._parse_expression_for_subroutine_usages(usages, goto.condition, parent_scope)
self._get_subroutine_usages_from_expression(usages, goto.condition, parent_scope)
def _parse_return_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_return(self, usages: Dict[Tuple[str, str], Set[str]],
returnnode: Return, parent_scope: Scope) -> None:
# node.value_A (expression), value_X (expression), value_Y (expression)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_A, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_X, parent_scope)
self._parse_expression_for_subroutine_usages(usages, returnnode.value_Y, parent_scope)
self._get_subroutine_usages_from_expression(usages, returnnode.value_A, parent_scope)
self._get_subroutine_usages_from_expression(usages, returnnode.value_X, parent_scope)
self._get_subroutine_usages_from_expression(usages, returnnode.value_Y, parent_scope)
def _parse_assignment_for_subroutine_usages(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_assignment(self, usages: Dict[Tuple[str, str], Set[str]],
assignment: Assignment, parent_scope: Scope) -> None:
# node.right (expression, or another Assignment)
if isinstance(assignment.right, Assignment):
self._parse_assignment_for_subroutine_usages(usages, assignment.right, parent_scope)
self._get_subroutine_usages_from_assignment(usages, assignment.right, parent_scope)
else:
self._parse_expression_for_subroutine_usages(usages, assignment.right, parent_scope)
self._get_subroutine_usages_from_expression(usages, assignment.right, parent_scope)
def _parse_asm_for_subroutine_usage(self, usages: Dict[Tuple[str, str], Set[str]],
def _get_subroutine_usages_from_asm(self, usages: Dict[Tuple[str, str], Set[str]],
asmnode: InlineAssembly, parent_scope: Scope) -> None:
# asm can refer to other symbols as well, track subroutine usage
for line in asmnode.assembly.splitlines():

View File

@ -104,13 +104,13 @@ def coerce_value(datatype: DataType, value: PrimitiveType, sourceref: SourceRef=
# if we're a BYTE type, and the value is a single character, convert it to the numeric value
def verify_bounds(value: PrimitiveType) -> None:
# if the value is out of bounds, raise an overflow exception
if isinstance(value, (int, float)):
if datatype == DataType.BYTE and not (0 <= value <= 0xff): # type: ignore
raise OverflowError("value out of range for byte")
if datatype == DataType.WORD and not (0 <= value <= 0xffff): # type: ignore
raise OverflowError("value out of range for word")
if datatype == DataType.FLOAT and not (FLOAT_MAX_NEGATIVE <= value <= FLOAT_MAX_POSITIVE): # type: ignore
raise OverflowError("value out of range for float")
if datatype in (DataType.BYTE, DataType.BYTEARRAY, DataType.MATRIX) and isinstance(value, str):
if len(value) == 1:
return True, char_to_bytevalue(value)

View File

@ -10,6 +10,7 @@ import subprocess
import datetime
import itertools
from typing import Union, TextIO, List, Tuple, Iterator
from .plylex import print_bold
from .plyparse import Module, ProgramFormat, Block, Directive, VarDef, Label, Subroutine, AstNode, ZpOptions
from .datatypes import VarType, DataType, to_hex, mflpt5_to_float, to_mflpt5, STRING_DATATYPES
@ -47,7 +48,17 @@ class AssemblyGenerator:
self.footer()
def sanitycheck(self):
# duplicate block names?
start_found = False
for block, parent in self.module.all_scopes():
for label in block.nodes:
if isinstance(label, Label) and label.name == "start" and block.name == "main":
start_found = True
break
if start_found:
break
if not start_found:
print_bold("ERROR: program entry point is missing ('start' label in 'main' block)\n")
raise SystemExit(1)
all_blocknames = [b.name for b in self.module.scope.filter_nodes(Block)]
unique_blocknames = set(all_blocknames)
if len(all_blocknames) != len(unique_blocknames):
@ -329,6 +340,7 @@ class AssemblyGenerator:
def generate_statement(self, stmt: AstNode) -> None:
if isinstance(stmt, Label):
self.p("\n{:s}\v\t\t; {:s}".format(stmt.name, stmt.lineref))
self.p("\vrts")
# @todo rest of the statement nodes

View File

@ -40,7 +40,7 @@ def main() -> None:
if args.nooptimize:
print_bold("not optimizing the parse tree!")
else:
print("\nOptimizing parse tree.")
print("\nOptimizing code.")
optimize(parsed_module)
print("\nGenerating assembly code.")
cg = AssemblyGenerator(parsed_module)

View File

@ -28,8 +28,7 @@ class Optimizer:
# and augmented assignments that have no effect (A+=0)
# @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false (or perhaps turn them into byte values first?)
for block, parent in self.module.all_scopes():
if block.scope:
for assignment in list(block.scope.nodes):
for assignment in list(block.nodes):
if isinstance(assignment, Assignment):
assignment.left = [lv for lv in assignment.left if lv != assignment.right]
if not assignment.left:
@ -43,18 +42,16 @@ class Optimizer:
print_warning("{}: removed statement that has no effect".format(assignment.sourceref))
block.scope.remove_node(assignment)
if assignment.right >= 8 and assignment.operator in ("<<=", ">>="):
self.num_warnings += 1
print_warning("{}: shifting result is always zero".format(assignment.sourceref))
print("{}: shifting result is always zero".format(assignment.sourceref))
new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref)
block.scope.replace_node(assignment, new_stmt)
def combine_assignments_into_multi(self):
# fold multiple consecutive assignments with the same rvalue into one multi-assignment
for block, parent in self.module.all_scopes():
if block.scope:
rvalue = None
assignments = []
for stmt in list(block.scope.nodes):
for stmt in list(block.nodes):
if isinstance(stmt, Assignment):
if assignments:
if stmt.right == rvalue:
@ -78,14 +75,12 @@ class Optimizer:
def optimize_multiassigns(self):
# optimize multi-assign statements (remove duplicate targets, optimize order)
for block, parent in self.module.all_scopes():
if block.scope:
for assignment in block.scope.nodes:
for assignment in block.nodes:
if isinstance(assignment, Assignment) and len(assignment.left) > 1:
# remove duplicates
lvalues = set(assignment.left)
if len(lvalues) != len(assignment.left):
self.num_warnings += 1
print_warning("{}: removed duplicate assignment targets".format(assignment.sourceref))
print("{}: removed duplicate assignment targets".format(assignment.sourceref))
# @todo change order: first registers, then zp addresses, then non-zp addresses, then the rest (if any)
assignment.left = list(lvalues)

View File

@ -61,6 +61,7 @@ tokens = (
"LOGICAND",
"LOGICOR",
"LOGICNOT",
"INTEGERDIVIDE",
"POWER",
"LABEL",
"IF",
@ -73,6 +74,7 @@ literals = ['+', '-', '*', '/', '(', ')', '[', ']', '{', '}', '.', ',', '!', '?'
# regex rules for simple tokens
t_INTEGERDIVIDE = r"//"
t_BITAND = r"&"
t_BITOR = r"\|"
t_BITXOR = r"\^"
@ -219,6 +221,12 @@ def t_LABEL(t):
return t
def t_BOOLEAN(t):
r"true|false"
t.value = t.value == "true"
return t
def t_DOTTEDNAME(t):
r"[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+"
return t

View File

@ -5,9 +5,12 @@ This is the parser of the IL65 code, that generates a parse tree.
Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0
"""
import math
import builtins
import inspect
import enum
from collections import defaultdict
from typing import Union, Generator, Tuple, List, Optional, Dict
from typing import Union, Generator, Tuple, List, Optional, Dict, Any, Iterable
import attr
from ply.yacc import yacc
from .plylex import SourceRef, tokens, lexer, find_tok_column
@ -26,15 +29,24 @@ class ZpOptions(enum.Enum):
CLOBBER_RESTORE = "clobber_restore"
math_functions = {name: func for name, func in vars(math).items() if inspect.isbuiltin(func)}
builtin_functions = {name: func for name, func in vars(builtins).items() if inspect.isbuiltin(func)}
class ParseError(Exception):
def __init__(self, message: str, sourceref: SourceRef) -> None:
super().__init__(message)
self.sourceref = sourceref
# @todo chain attribute, a list of other exceptions, so we can have more than 1 error at a time.
def __str__(self):
return "{} {:s}".format(self.sourceref, self.args[0])
class ExpressionEvaluationError(ParseError):
pass
start = "start"
@ -69,9 +81,9 @@ class AstNode:
tostr(elt, level + 2)
tostr(self, 0)
def process_expressions(self) -> None:
# process/simplify all expressions (constant folding etc) @todo
# @todo override in node types that have expression(s)
def process_expressions(self, scope: 'Scope') -> None:
# process/simplify all expressions (constant folding etc)
# this is implemented in node types that have expression(s) and that should act on this.
pass
@ -115,6 +127,7 @@ class Scope(AstNode):
node.scope.parent_scope = self
def __getitem__(self, name: str) -> AstNode:
assert isinstance(name, str)
if '.' in name:
# look up the dotted name starting from the topmost scope
scope = self
@ -166,13 +179,13 @@ class Scope(AstNode):
self._populate_symboltable(newnode)
def validate_address(object: AstNode, attrib: attr.Attribute, value: Optional[int]):
def validate_address(obj: AstNode, attrib: attr.Attribute, value: Optional[int]):
if value is None:
return
if isinstance(object, Block) and object.name == "ZP":
raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), object.sourceref)
if isinstance(obj, Block) and obj.name == "ZP":
raise ParseError("zeropage block cannot have custom start {:s}".format(attrib.name), obj.sourceref)
if value < 0x0200 or value > 0xffff:
raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), object.sourceref)
raise ParseError("invalid {:s} (must be from $0200 to $ffff)".format(attrib.name), obj.sourceref)
@attr.s(cmp=False, repr=False)
@ -185,6 +198,12 @@ class Block(AstNode):
def __attrs_post_init__(self):
self.scope.name = self.name
@property
def nodes(self) -> Iterable[AstNode]:
if self.scope:
return self.scope.nodes
return []
@property
def label(self) -> str:
if self.name:
@ -205,6 +224,12 @@ class Module(AstNode):
address = attr.ib(type=int, init=False, default=0xc000, validator=validate_address) # can be set via directive
zp_options = attr.ib(type=ZpOptions, init=False, default=ZpOptions.NOCLOBBER) # can be set via directive
@property
def nodes(self) -> Iterable[AstNode]:
if self.scope:
return self.scope.nodes
return []
def all_scopes(self) -> Generator[Tuple[AstNode, AstNode], None, None]:
# generator that recursively yields through the scopes (preorder traversal), yields (node, parent_node) tuples.
# it iterates of copies of the node collections, so it's okay to modify the scopes you iterate over.
@ -275,6 +300,9 @@ class Assignment(AstNode):
new_targets.append(t)
self.left = new_targets
def process_expressions(self, scope: Scope) -> None:
self.right = process_expression(self.right, scope, self.right.sourceref)
@attr.s(cmp=False, repr=False)
class AugAssignment(AstNode):
@ -282,6 +310,9 @@ class AugAssignment(AstNode):
operator = attr.ib(type=str)
right = attr.ib()
def process_expressions(self, scope: Scope) -> None:
self.right = process_expression(self.right, scope, self.right.sourceref)
@attr.s(cmp=False, repr=False)
class SubCall(AstNode):
@ -292,6 +323,11 @@ class SubCall(AstNode):
def __attrs_post_init__(self):
self.arguments = self.arguments or []
def process_expressions(self, scope: Scope) -> None:
for callarg in self.arguments:
assert isinstance(callarg, CallArgument)
callarg.process_expressions(scope)
@attr.s(cmp=False, repr=False)
class Return(AstNode):
@ -299,6 +335,14 @@ class Return(AstNode):
value_X = attr.ib(default=None)
value_Y = attr.ib(default=None)
def process_expressions(self, scope: Scope) -> None:
if self.value_A is not None:
self.value_A = process_expression(self.value_A, scope, self.value_A.sourceref)
if self.value_X is not None:
self.value_X = process_expression(self.value_X, scope, self.value_X.sourceref)
if self.value_Y is not None:
self.value_Y = process_expression(self.value_Y, scope, self.value_Y.sourceref)
@attr.s(cmp=False, repr=False)
class TargetRegisters(AstNode):
@ -347,12 +391,9 @@ class VarDef(AstNode):
self.value = 0
# note: value coercion is done later, when all expressions are evaluated
def process_expressions(self) -> None:
if isinstance(self.value, Expression):
# process/simplify all expressions (constant folding etc) # @todo
# verify that the expression yields a single constant value, replace value by that value # @todo
self.value = 123 # XXX
assert not isinstance(self.value, Expression)
def process_expressions(self, scope: Scope) -> None:
self.value = process_expression(self.value, scope, self.sourceref)
assert not isinstance(self.value, Expression), "processed expression for vardef should reduce to a constant value"
if self.vartype in (VarType.CONST, VarType.VAR):
try:
_, self.value = coerce_value(self.datatype, self.value, self.sourceref)
@ -388,6 +429,12 @@ class Subroutine(AstNode):
scope = attr.ib(type=Scope, default=None)
address = attr.ib(type=int, default=None, validator=validate_address)
@property
def nodes(self) -> Iterable[AstNode]:
if self.scope:
return self.scope.nodes
return []
def __attrs_post_init__(self):
if self.scope and self.address is not None:
raise ValueError("subroutine must have either a scope or an address, not both")
@ -401,6 +448,10 @@ class Goto(AstNode):
if_stmt = attr.ib(default=None)
condition = attr.ib(default=None)
def process_expressions(self, scope: Scope) -> None:
if self.condition is not None:
self.condition = process_expression(self.condition, scope, self.condition.sourceref)
@attr.s(cmp=False, repr=False)
class Dereference(AstNode):
@ -420,6 +471,37 @@ class Dereference(AstNode):
self.datatype = self.datatype.to_enum()
@attr.s(cmp=False, repr=False)
class LiteralValue(AstNode):
value = attr.ib()
def __repr__(self) -> str:
return repr(self.value)
@attr.s(cmp=False, repr=False)
class AddressOf(AstNode):
name = attr.ib(type=str)
@attr.s(cmp=False, repr=False)
class IncrDecr(AstNode):
target = attr.ib()
operator = attr.ib(type=str, validator=attr.validators.in_(["++", "--"]))
howmuch = attr.ib(default=1)
def __attrs_post_init__(self):
# make sure the amount is always >= 0
if self.howmuch < 0:
self.howmuch = -self.howmuch
self.operator = "++" if self.operator == "--" else "--"
@attr.s(cmp=False, repr=False)
class SymbolName(AstNode):
name = attr.ib(type=str)
@attr.s(cmp=False, slots=True, repr=False)
class CallTarget(AstNode):
target = attr.ib()
@ -431,11 +513,8 @@ class CallArgument(AstNode):
value = attr.ib()
name = attr.ib(type=str, default=None)
@attr.s(cmp=False, repr=False)
class UnaryOp(AstNode):
operator = attr.ib(type=str)
operand = attr.ib()
def process_expressions(self, scope: Scope) -> None:
self.value = process_expression(self.value, scope, self.sourceref)
@attr.s(cmp=False, slots=True, repr=False)
@ -443,10 +522,187 @@ class Expression(AstNode):
left = attr.ib()
operator = attr.ib(type=str)
right = attr.ib()
unary = attr.ib(type=bool, default=False)
processed_must_be_constant = attr.ib(type=bool, init=False, default=False) # does the expression have to be a constant value?
processed = attr.ib(type=bool, init=False, default=False) # has this expression been processed/simplified yet?
constant = attr.ib(type=bool, init=False, default=False) # is the processed expression a constant value?
def __attrs_post_init__(self):
assert self.operator not in ("++", "--"), "incr/decr should not be an expression"
def process_expressions(self, scope: Scope) -> None:
raise RuntimeError("should be done via parent node's process_expressions")
def evaluate_primitive_constants(self, scope: Scope) -> Union[int, float, str, bool]:
# make sure the lvalue and rvalue are primitives, and the operator is allowed
if not isinstance(self.left, (LiteralValue, int, float, str, bool)):
raise TypeError("left", self)
if not isinstance(self.right, (LiteralValue, int, float, str, bool)):
raise TypeError("right", self)
if self.operator not in {'+', '-', '*', '/', '//', '~', '<', '>', '<=', '>=', '==', '!='}:
raise ValueError("operator", self)
estr = "{} {} {}".format(repr(self.left), self.operator, repr(self.right))
try:
return eval(estr, {}, {}) # safe because of checks above
except Exception as x:
raise ExpressionEvaluationError("expression error: " + str(x), self.sourceref) from None
def print_tree(self) -> None:
def tree(expr: Any, level: int) -> str:
indent = " "*level
if not isinstance(expr, Expression):
return indent + str(expr) + "\n"
if expr.unary:
return indent + "{}{}".format(expr.operator, tree(expr.left, level+1))
else:
return indent + "{}".format(tree(expr.left, level+1)) + \
indent + str(expr.operator) + "\n" + \
indent + "{}".format(tree(expr.right, level + 1))
print(tree(self, 0))
def process_expression(value: Any, scope: Scope, sourceref: SourceRef) -> Any:
# process/simplify all expressions (constant folding etc)
if isinstance(value, Expression):
must_be_constant = value.processed_must_be_constant
else:
must_be_constant = False
if must_be_constant:
return process_constant_expression(value, sourceref, scope)
else:
return process_dynamic_expression(value, sourceref, scope)
def process_constant_expression(expr: Any, sourceref: SourceRef, symbolscope: Scope) -> Union[int, float, str, bool]:
# the expression must result in a single (constant) value (int, float, whatever)
if expr is None or isinstance(expr, (int, float, str, bool)):
return expr
elif isinstance(expr, LiteralValue):
return expr.value
elif isinstance(expr, SymbolName):
try:
value = symbolscope[expr.name]
if isinstance(value, VarDef):
if value.vartype == VarType.MEMORY:
raise ExpressionEvaluationError("can't take a memory value, must be a constant", expr.sourceref)
value = value.value
if isinstance(value, Expression):
raise ExpressionEvaluationError("circular reference?", expr.sourceref)
elif isinstance(value, (int, float, str, bool)):
return value
else:
raise ExpressionEvaluationError("constant symbol required, not {}".format(value.__class__.__name__), expr.sourceref)
except LookupError as x:
raise ExpressionEvaluationError(str(x), expr.sourceref) from None
elif isinstance(expr, AddressOf):
assert isinstance(expr.name, SymbolName)
try:
value = symbolscope[expr.name.name]
if isinstance(value, VarDef):
if value.vartype == VarType.MEMORY:
return value.value
raise ParseError("can't take the address of this {}".format(value.__class__.__name__), expr.name.sourceref)
else:
raise ExpressionEvaluationError("constant address required, not {}".format(value.__class__.__name__), expr.name.sourceref)
except LookupError as x:
raise ParseError(str(x), expr.sourceref) from None
elif isinstance(expr, SubCall):
if isinstance(expr.target, CallTarget):
funcname = expr.target.target.name
if funcname in math_functions or funcname in builtin_functions:
if isinstance(expr.target.target, SymbolName):
func_args = []
for a in (process_constant_expression(callarg.value, sourceref, symbolscope) for callarg in expr.arguments):
if isinstance(a, LiteralValue):
func_args.append(a.value)
else:
func_args.append(a)
func = math_functions.get(funcname, builtin_functions.get(funcname))
try:
return func(*func_args)
except Exception as x:
raise ExpressionEvaluationError(str(x), expr.sourceref)
else:
raise ParseError("symbol name required, not {}".format(expr.target.__class__.__name__), expr.sourceref)
else:
raise ExpressionEvaluationError("can only use math- or builtin function", expr.sourceref)
else:
raise ParseError("function name required, not {}".format(expr.target.__class__.__name__), expr.sourceref)
elif not isinstance(expr, Expression):
raise ExpressionEvaluationError("constant value required, not {}".format(expr.__class__.__name__), expr.sourceref)
if expr.unary:
left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref
expr.left = process_constant_expression(expr.left, left_sourceref, symbolscope)
if isinstance(expr.left, (int, float)):
try:
if expr.operator == '-':
return -expr.left
elif expr.operator == '~':
return ~expr.left # type: ignore
elif expr.operator in ("++", "--"):
raise ValueError("incr/decr should not be an expression")
raise ValueError("invalid unary operator", expr.operator)
except TypeError as x:
raise ParseError(str(x), expr.sourceref) from None
raise ValueError("invalid operand type for unary operator", expr.left, expr.operator)
else:
left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref
expr.left = process_constant_expression(expr.left, left_sourceref, symbolscope)
right_sourceref = expr.right.sourceref if isinstance(expr.right, AstNode) else sourceref
expr.right = process_constant_expression(expr.right, right_sourceref, symbolscope)
if isinstance(expr.left, (LiteralValue, SymbolName, int, float, str, bool)):
if isinstance(expr.right, (LiteralValue, SymbolName, int, float, str, bool)):
return expr.evaluate_primitive_constants(symbolscope)
else:
raise ExpressionEvaluationError("constant value required on right, not {}"
.format(expr.right.__class__.__name__), right_sourceref)
else:
raise ExpressionEvaluationError("constant value required on left, not {}"
.format(expr.left.__class__.__name__), left_sourceref)
def process_dynamic_expression(expr: Any, sourceref: SourceRef, symbolscope: Scope) -> Any:
# constant-fold a dynamic expression
if expr is None or isinstance(expr, (int, float, str, bool)):
return expr
elif isinstance(expr, LiteralValue):
return expr.value
elif isinstance(expr, SymbolName):
try:
return process_constant_expression(expr, sourceref, symbolscope)
except ExpressionEvaluationError:
return expr
elif isinstance(expr, AddressOf):
try:
return process_constant_expression(expr, sourceref, symbolscope)
except ExpressionEvaluationError:
return expr
elif isinstance(expr, SubCall):
try:
return process_constant_expression(expr, sourceref, symbolscope)
except ExpressionEvaluationError:
return expr
elif isinstance(expr, Register):
return expr
elif not isinstance(expr, Expression):
raise ParseError("expression required, not {}".format(expr.__class__.__name__), expr.sourceref)
if expr.unary:
left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref
expr.left = process_dynamic_expression(expr.left, left_sourceref, symbolscope)
try:
return process_constant_expression(expr, sourceref, symbolscope)
except ExpressionEvaluationError:
return expr
else:
left_sourceref = expr.left.sourceref if isinstance(expr.left, AstNode) else sourceref
expr.left = process_dynamic_expression(expr.left, left_sourceref, symbolscope)
right_sourceref = expr.right.sourceref if isinstance(expr.right, AstNode) else sourceref
expr.right = process_dynamic_expression(expr.right, right_sourceref, symbolscope)
try:
return process_constant_expression(expr, sourceref, symbolscope)
except ExpressionEvaluationError:
return expr
# ----------------- PLY parser definition follows ----------------------
def p_start(p):
"""
@ -653,7 +909,7 @@ def p_literal_value(p):
| STRING
| CHARACTER
| BOOLEAN"""
p[0] = p[1]
p[0] = LiteralValue(value=p[1], sourceref=_token_sref(p, 1))
def p_subroutine(p):
@ -759,14 +1015,14 @@ def p_incrdecr(p):
incrdecr : assignment_target INCR
| assignment_target DECR
"""
p[0] = UnaryOp(operator=p[2], operand=p[1], sourceref=_token_sref(p, 1))
p[0] = IncrDecr(target=p[1], operator=p[2], sourceref=_token_sref(p, 1))
def p_call_subroutine(p):
"""
subroutine_call : calltarget preserveregs_opt '(' call_arguments_opt ')'
"""
p[0] = SubCall(target=p[1], preserve_regs=p[2], arguments=p[4], sourceref=_token_sref(p, 1))
p[0] = SubCall(target=p[1], preserve_regs=p[2], arguments=p[4], sourceref=_token_sref(p, 3))
def p_preserveregs_opt(p):
@ -894,7 +1150,7 @@ def p_symbolname(p):
symbolname : NAME
| DOTTEDNAME
"""
p[0] = p[1]
p[0] = SymbolName(name=p[1], sourceref=_token_sref(p, 1))
def p_assignment(p):
@ -914,7 +1170,7 @@ def p_aug_assignment(p):
precedence = (
('left', '+', '-'),
('left', '*', '/'),
('left', '*', '/', 'INTEGERDIVIDE'),
('right', 'UNARY_MINUS', 'BITINVERT', "UNARY_ADDRESSOF"),
('left', "LT", "GT", "LE", "GE", "EQUALS", "NOTEQUALS"),
('nonassoc', "COMMENT"),
@ -927,6 +1183,7 @@ def p_expression(p):
| expression '-' expression
| expression '*' expression
| expression '/' expression
| expression INTEGERDIVIDE expression
| expression LT expression
| expression GT expression
| expression LE expression
@ -941,21 +1198,21 @@ def p_expression_uminus(p):
"""
expression : '-' expression %prec UNARY_MINUS
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
p[0] = Expression(left=p[2], operator=p[1], right=None, unary=True, sourceref=_token_sref(p, 1))
def p_expression_addressof(p):
"""
expression : BITAND symbolname %prec UNARY_ADDRESSOF
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
p[0] = AddressOf(name=p[2], sourceref=_token_sref(p, 1))
def p_unary_expression_bitinvert(p):
"""
expression : BITINVERT expression
"""
p[0] = UnaryOp(operator=p[1], operand=p[2], sourceref=_token_sref(p, 1))
p[0] = Expression(left=p[2], operator=p[1], right=None, unary=True, sourceref=_token_sref(p, 1))
def p_expression_group(p):
@ -1012,7 +1269,10 @@ def p_error(p):
print('\n[ERROR DEBUG: parser state={:d} stack: {} . {} ]'.format(parser.state, stack_state_str, p))
if p:
sref = SourceRef(p.lexer.source_filename, p.lineno, find_tok_column(p))
p.lexer.error_function(sref, "syntax error before '{:.20s}'", str(p.value))
if p.value in ("", "\n"):
p.lexer.error_function(sref, "syntax error before end of line")
else:
p.lexer.error_function(sref, "syntax error before '{:.20s}'", str(p.value).rstrip())
else:
lexer.error_function(None, "syntax error at end of input", lexer.source_filename)

View File

@ -1,7 +1,7 @@
IL65 / 'Sick' - Experimental Programming Language for 8-bit 6502/6510 microprocessors
=====================================================================================
*Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0*
*Written by Irmen de Jong (irmen@razorvine.net)*
*Software license: GNU GPL 3.0, see file LICENSE*
@ -17,6 +17,7 @@ which aims to provide many conveniences over raw assembly code (even when using
- subroutines have enforced input- and output parameter definitions
- various data types other than just bytes (16-bit words, floats, strings, 16-bit register pairs)
- automatic variable allocations, automatic string variables and string sharing
- constant folding in expressions (compile-time evaluation)
- automatic type conversions
- floating point operations
- optional automatic preserving and restoring CPU registers state, when calling routines that otherwise would clobber these
@ -24,6 +25,7 @@ which aims to provide many conveniences over raw assembly code (even when using
- breakpoints, that let the Vice emulator drop into the monitor if execution hits them
- source code labels automatically loaded in Vice emulator so it can show them in disassembly
- conditional gotos
- some code optimizations (such as not repeatedly loading the same value in a register)
- @todo: loops
- @todo: memory block operations

View File

@ -1,5 +1,5 @@
from il65.plylex import lexer, tokens, find_tok_column, literals, reserved
from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return
from il65.plylex import lexer, tokens, find_tok_column, literals, reserved, SourceRef
from il65.plyparse import parser, TokenFilter, Module, Subroutine, Block, Return, Scope, VarDef, Expression, LiteralValue, Label
def test_lexer_definitions():
@ -26,6 +26,7 @@ test_source = """ %output prg, sys
; comment
var foo = 42+true
var .matrix(20,30) m = 9.234556
;comment2
@ -47,6 +48,7 @@ def test_lexer():
assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL', 'ENDL',
'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL', 'ENDL',
'VARTYPE', 'NAME', 'IS', 'INTEGER', '+', 'BOOLEAN', 'ENDL',
'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL', 'ENDL',
'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL', 'ENDL', 'ENDL', 'ENDL',
'}', 'ENDL']
@ -56,6 +58,10 @@ def test_lexer():
assert directive_token.lineno == 9
assert directive_token.lexpos == lexer.lexdata.index("%import")
assert find_tok_column(directive_token) == 10
bool_token = tokens[23]
assert bool_token.type == "BOOLEAN"
assert type(bool_token.value) is bool
assert bool_token.value == True
def test_tokenfilter():
@ -72,6 +78,7 @@ def test_tokenfilter():
assert token_types == ['DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
'BITINVERT', 'NAME', 'INTEGER', '{', 'ENDL',
'DIRECTIVE', 'NAME', ',', 'NAME', 'ENDL',
'VARTYPE', 'NAME', 'IS', 'INTEGER', '+', 'BOOLEAN', 'ENDL',
'VARTYPE', 'DATATYPE', '(', 'INTEGER', ',', 'INTEGER', ')', 'NAME', 'IS', 'FLOATINGPOINT', 'ENDL',
'SUB', 'NAME', '(', ')', 'RARROW', '(', ')', '{', 'ENDL', 'RETURN', 'ENDL', '}', 'ENDL',
'}', 'ENDL']
@ -93,10 +100,17 @@ def test_parser():
block = result.scope["block"]
assert isinstance(block, Block)
assert block.name == "block"
assert block.nodes is block.scope.nodes
bool_vdef = block.scope.nodes[1]
assert isinstance(bool_vdef, VarDef)
assert isinstance(bool_vdef.value, Expression)
assert isinstance(bool_vdef.value.right, LiteralValue)
assert isinstance(bool_vdef.value.right.value, bool)
assert bool_vdef.value.right.value == True
assert block.address == 49152
sub2 = block.scope["calculate"]
assert sub2 is sub
assert sub2.lineref == "src l. 18"
assert sub2.lineref == "src l. 19"
all_scopes = list(result.all_scopes())
assert len(all_scopes) == 3
assert isinstance(all_scopes[0][0], Module)
@ -108,4 +122,16 @@ def test_parser():
stmt = list(all_scopes[2][0].scope.filter_nodes(Return))
assert len(stmt) == 1
assert isinstance(stmt[0], Return)
assert stmt[0].lineref == "src l. 19"
assert stmt[0].lineref == "src l. 20"
def test_block_nodes():
sref = SourceRef("file", 1, 1)
sub1 = Subroutine(name="subaddr", param_spec=[], result_spec=[], address=0xc000, sourceref=sref)
sub2 = Subroutine(name="subblock", param_spec=[], result_spec=[],
scope=Scope(nodes=[Label(name="start", sourceref=sref)], sourceref=sref), sourceref=sref)
assert sub1.scope is None
assert sub1.nodes == []
assert sub2.scope is not None
assert len(sub2.scope.nodes) > 0
assert sub2.nodes is sub2.scope.nodes

View File

@ -10,14 +10,18 @@
%saveregisters true
const num = 2
var var1 =2
var .word wvar1 = 2 + foo() ; @todo constant check error
const num = 2 + max(2, 8, 3.44//3)
const pi_val = 22/7 - 2.23423
var var1 =2 + 9/4
var .word wvar2 = 2 + cos(23.2)
memory memvar = $d020
var .word test2b = &memvar
var test3 = var1
start:
wvar1 = 2+foo()
wvar1 = 2+foo()+emptysub2
A=math.randbyte()
A += c64.RASTER
@ -212,11 +216,11 @@ sub emptysub () -> () {
%saveregisters
%breakpoint
return
return 999990 + (2*sin(1.0)) + foo(), 999990 -1, 999999
}
~ {
;sdfsdf
return
return 999, -1, 3.445
;sdfsdf
}