From 10d0dbe80bdd7ba98d77b9d405376df467aed844 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sun, 14 Jan 2018 18:02:39 +0100 Subject: [PATCH] improved sourceref column calculation when dealing with tabs, added more error checks --- il65/compile.py | 18 ++- il65/emit/assignment.py | 18 ++- il65/emit/calls.py | 1 - il65/emit/generate.py | 11 +- il65/emit/incrdecr.py | 271 +++++++++++++++++++--------------------- il65/optimize.py | 11 +- il65/plylex.py | 18 ++- il65/plyparse.py | 37 +++--- reference.md | 9 +- tests/test_parser.py | 26 ++++ todo.ill | 22 ++++ 11 files changed, 266 insertions(+), 176 deletions(-) diff --git a/il65/compile.py b/il65/compile.py index 0a4e216e9..534cf1ab9 100644 --- a/il65/compile.py +++ b/il65/compile.py @@ -13,7 +13,7 @@ from typing import Optional, Tuple, Set, Dict, List, Any, no_type_check import attr from .plyparse import parse_file, ParseError, Module, Directive, Block, Subroutine, Scope, VarDef, LiteralValue, \ SubCall, Goto, Return, Assignment, InlineAssembly, Register, Expression, ProgramFormat, ZpOptions,\ - SymbolName, Dereference, AddressOf + SymbolName, Dereference, AddressOf, IncrDecr, TargetRegisters from .plylex import SourceRef, print_bold from .optimize import optimize from .datatypes import DataType, VarType @@ -42,6 +42,7 @@ class PlyParser: # these shall only be done on the main module after all imports have been done: self.apply_directive_options(module) self.determine_subroutine_usage(module) + self.semantic_check(module) self.allocate_zeropage_vars(module) except ParseError as x: self.handle_parse_error(x) @@ -54,6 +55,18 @@ class PlyParser: self.parse_errors += 1 print_bold("ERROR: {}: {}".format(sourceref, fmtstring.format(*args))) + def semantic_check(self, module: Module) -> None: + # perform semantic analysis / checks on the syntactic parse tree we have so far + for block, parent in module.all_scopes(): + assert isinstance(block, (Module, Block, Subroutine)) + assert parent is None or isinstance(parent, (Module, Block, Subroutine)) + for stmt in block.nodes: + if isinstance(stmt, IncrDecr): + if isinstance(stmt.target, SymbolName): + symdef = block.scope[stmt.target.name] + if isinstance(symdef, VarDef) and symdef.vartype == VarType.CONST: + raise ParseError("cannot modify a constant", stmt.sourceref) + def check_and_merge_zeropages(self, module: Module) -> None: # merge all ZP blocks into one zeropage = None @@ -126,7 +139,6 @@ class PlyParser: if isinstance(node.right, Assignment): multi = reduce_right(node) assert multi is node and len(multi.left) > 1 and not isinstance(multi.right, Assignment) - node.simplify_targetregisters() def apply_directive_options(self, module: Module) -> None: def set_save_registers(scope: Scope, save_dir: Directive) -> None: @@ -392,7 +404,7 @@ class PlyParser: print("Error:", str(exc), file=sys.stderr) sourcetext = linecache.getline(exc.sourceref.file, exc.sourceref.line).rstrip() if sourcetext: - print(" " + sourcetext.expandtabs(1), file=sys.stderr) + print(" " + sourcetext.expandtabs(8), file=sys.stderr) if exc.sourceref.column: print(' ' * (1+exc.sourceref.column) + '^', file=sys.stderr) if sys.stderr.isatty(): diff --git a/il65/emit/assignment.py b/il65/emit/assignment.py index 403df6f94..5dcbe9e95 100644 --- a/il65/emit/assignment.py +++ b/il65/emit/assignment.py @@ -6,7 +6,23 @@ Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from typing import Callable -from ..plyparse import LiteralValue, Assignment, AugAssignment +from ..plyparse import AstNode, Scope, VarDef, Dereference, Register, TargetRegisters,\ + LiteralValue, Assignment, AugAssignment +from ..datatypes import DataType +from ..plyparse import SymbolName + + +def datatype_of(assignmenttarget: AstNode, scope: Scope) -> DataType: + if isinstance(assignmenttarget, (VarDef, Dereference, Register)): + return assignmenttarget.datatype + elif isinstance(assignmenttarget, SymbolName): + symdef = scope[assignmenttarget.name] + if isinstance(symdef, VarDef): + return symdef.datatype + elif isinstance(assignmenttarget, TargetRegisters): + if len(assignmenttarget.registers) == 1: + return datatype_of(assignmenttarget.registers[0], scope) + raise TypeError("cannot determine datatype", assignmenttarget) def generate_assignment(out: Callable, stmt: Assignment) -> None: diff --git a/il65/emit/calls.py b/il65/emit/calls.py index a4c77853e..b188d20f5 100644 --- a/il65/emit/calls.py +++ b/il65/emit/calls.py @@ -15,4 +15,3 @@ def generate_goto(out: Callable, stmt: Goto) -> None: def generate_subcall(out: Callable, stmt: SubCall) -> None: pass # @todo - diff --git a/il65/emit/generate.py b/il65/emit/generate.py index cdb20ab0f..cd854595d 100644 --- a/il65/emit/generate.py +++ b/il65/emit/generate.py @@ -9,7 +9,7 @@ import os import datetime from typing import TextIO, Callable from ..plylex import print_bold -from ..plyparse import Module, ProgramFormat, Block, Directive, VarDef, Label, Subroutine, AstNode, ZpOptions, \ +from ..plyparse import Module, Scope, ProgramFormat, Block, Directive, VarDef, Label, Subroutine, AstNode, ZpOptions, \ InlineAssembly, Return, Register, Goto, SubCall, Assignment, AugAssignment, IncrDecr from . import CodeError, to_hex from .variables import generate_block_init, generate_block_vars @@ -160,7 +160,7 @@ class AssemblyGenerator: for stmt in block.scope.nodes: if isinstance(stmt, (VarDef, Subroutine)): continue # should have been handled already or will be later - self.generate_statement(out, stmt) + self.generate_statement(out, stmt, block.scope) if block.name == "main" and isinstance(stmt, Label) and stmt.name == "start": # make sure the main.start routine clears the decimal and carry flags as first steps out("\vcld\n\vclc\n\vclv") @@ -177,15 +177,14 @@ class AssemblyGenerator: out("\v; params: {}\n\v; returns: {} clobbers: {}".format(params or "-", returns or "-", clobbers or "-")) cur_block = self.cur_block self.cur_block = subdef.scope - print(subdef.scope.nodes) for stmt in subdef.scope.nodes: - self.generate_statement(out, stmt) + self.generate_statement(out, stmt, subdef.scope) self.cur_block = cur_block out("") out("; -- end block subroutines") out("\n\v.pend\n") - def generate_statement(self, out: Callable, stmt: AstNode) -> None: + def generate_statement(self, out: Callable, stmt: AstNode, scope: Scope) -> None: if isinstance(stmt, Label): out("\n{:s}\v\t\t; {:s}".format(stmt.name, stmt.lineref)) elif isinstance(stmt, Return): @@ -207,7 +206,7 @@ class AssemblyGenerator: out(stmt.assembly) out("\v; end inline asm, " + stmt.lineref + "\n") elif isinstance(stmt, IncrDecr): - generate_incrdecr(out, stmt) + generate_incrdecr(out, stmt, scope) elif isinstance(stmt, Goto): generate_goto(out, stmt) elif isinstance(stmt, SubCall): diff --git a/il65/emit/incrdecr.py b/il65/emit/incrdecr.py index 8c658ae0b..036753beb 100644 --- a/il65/emit/incrdecr.py +++ b/il65/emit/incrdecr.py @@ -6,217 +6,206 @@ Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ from typing import Callable -from ..plyparse import Scope, AstNode, Register, IncrDecr, TargetRegisters, SymbolName, Dereference +from ..plyparse import Scope, VarType, VarDef, Register, TargetRegisters, IncrDecr, SymbolName, Dereference from ..datatypes import DataType, REGISTER_BYTES from . import CodeError, to_hex, preserving_registers +from .assignment import datatype_of -def datatype_of(node: AstNode, scope: Scope) -> DataType: - if isinstance(node, (Dereference, Register)): - return node.datatype - if isinstance(node, SymbolName): - symdef = scope[node.name] - - raise TypeError("cannot determine datatype", node) - - -def generate_incrdecr(out: Callable, stmt: IncrDecr) -> None: +def generate_incrdecr(out: Callable, stmt: IncrDecr, scope: Scope) -> None: assert isinstance(stmt.howmuch, (int, float)) and stmt.howmuch >= 0 assert stmt.operator in ("++", "--") - target = stmt.target - if isinstance(target, TargetRegisters): - if len(target.registers) != 1: - raise CodeError("incr/decr can operate on one register at a time only") - target = target[0] - # target = Register/SymbolName/Dereference + target = stmt.target # one of Register/SymbolName/Dereference + if isinstance(target, SymbolName): + symdef = scope[target.name] + if isinstance(symdef, VarDef): + target = symdef + else: + raise CodeError("cannot incr/decr this", symdef) if stmt.howmuch > 255: - if isinstance(stmt.target, TargetRegisters) - if stmt.what.datatype != DataType.FLOAT and not stmt.value.name and stmt.value.value > 0xff: - raise CodeError("only supports integer incr/decr by up to 255 for now") # XXX - howmuch = stmt.value.value - value_str = stmt.value.name or str(howmuch) - if isinstance(stmt.what, RegisterValue): - reg = stmt.what.register + if datatype_of(target, scope) != DataType.FLOAT: + raise CodeError("only supports integer incr/decr by up to 255 for now") + howmuch_str = str(stmt.howmuch) + + if isinstance(target, Register): + reg = target.name # note: these operations below are all checked to be ok if stmt.operator == "++": if reg == 'A': # a += 1..255 - out("\t\tclc") - out("\t\tadc #" + value_str) + out("\vclc") + out("\vadc #" + howmuch_str) elif reg in REGISTER_BYTES: - if howmuch == 1: + if stmt.howmuch == 1: # x/y += 1 - out("\t\tin{:s}".format(reg.lower())) + out("\vin{:s}".format(reg.lower())) else: # x/y += 2..255 - with preserving_registers({'A'}): - out("\t\tt{:s}a".format(reg.lower())) - out("\t\tclc") - out("\t\tadc #" + value_str) - out("\t\tta{:s}".format(reg.lower())) + with preserving_registers({'A'}, scope, out): + out("\vt{:s}a".format(reg.lower())) + out("\vclc") + out("\vadc #" + howmuch_str) + out("\vta{:s}".format(reg.lower())) elif reg == "AX": # AX += 1..255 - out("\t\tclc") - out("\t\tadc #" + value_str) - out("\t\tbcc +") - out("\t\tinx") + out("\vclc") + out("\vadc #" + howmuch_str) + out("\vbcc +") + out("\vinx") out("+") elif reg == "AY": # AY += 1..255 - out("\t\tclc") - out("\t\tadc # " + value_str) - out("\t\tbcc +") - out("\t\tiny") + out("\vclc") + out("\vadc # " + howmuch_str) + out("\vbcc +") + out("\viny") out("+") elif reg == "XY": - if howmuch == 1: + if stmt.howmuch == 1: # XY += 1 - out("\t\tinx") - out("\t\tbne +") - out("\t\tiny") + out("\vinx") + out("\vbne +") + out("\viny") out("+") else: # XY += 2..255 - with preserving_registers({'A'}): - out("\t\ttxa") - out("\t\tclc") - out("\t\tadc #" + value_str) - out("\t\ttax") - out("\t\tbcc +") - out("\t\tiny") + with preserving_registers({'A'}, scope, out): + out("\vtxa") + out("\vclc") + out("\vadc #" + howmuch_str) + out("\vtax") + out("\vbcc +") + out("\viny") out("+") else: raise CodeError("invalid incr register: " + reg) else: if reg == 'A': # a -= 1..255 - out("\t\tsec") - out("\t\tsbc #" + value_str) + out("\vsec") + out("\vsbc #" + howmuch_str) elif reg in REGISTER_BYTES: - if howmuch == 1: + if stmt.howmuch == 1: # x/y -= 1 - out("\t\tde{:s}".format(reg.lower())) + out("\vde{:s}".format(reg.lower())) else: # x/y -= 2..255 - with preserving_registers({'A'}): - out("\t\tt{:s}a".format(reg.lower())) - out("\t\tsec") - out("\t\tsbc #" + value_str) - out("\t\tta{:s}".format(reg.lower())) + with preserving_registers({'A'}, scope, out): + out("\vt{:s}a".format(reg.lower())) + out("\vsec") + out("\vsbc #" + howmuch_str) + out("\vta{:s}".format(reg.lower())) elif reg == "AX": # AX -= 1..255 - out("\t\tsec") - out("\t\tsbc #" + value_str) - out("\t\tbcs +") - out("\t\tdex") + out("\vsec") + out("\vsbc #" + howmuch_str) + out("\vbcs +") + out("\vdex") out("+") elif reg == "AY": # AY -= 1..255 - out("\t\tsec") - out("\t\tsbc #" + value_str) - out("\t\tbcs +") - out("\t\tdey") + out("\vsec") + out("\vsbc #" + howmuch_str) + out("\vbcs +") + out("\vdey") out("+") elif reg == "XY": - if howmuch == 1: + if stmt.howmuch == 1: # XY -= 1 - out("\t\tcpx #0") - out("\t\tbne +") - out("\t\tdey") + out("\vcpx #0") + out("\vbne +") + out("\vdey") out("+\t\tdex") else: # XY -= 2..255 - with preserving_registers({'A'}): - out("\t\ttxa") - out("\t\tsec") - out("\t\tsbc #" + value_str) - out("\t\ttax") - out("\t\tbcs +") - out("\t\tdey") + with preserving_registers({'A'}, scope, out): + out("\vtxa") + out("\vsec") + out("\vsbc #" + howmuch_str) + out("\vtax") + out("\vbcs +") + out("\vdey") out("+") else: raise CodeError("invalid decr register: " + reg) - elif isinstance(stmt.what, (MemMappedValue, IndirectValue)): - what = stmt.what - if isinstance(what, IndirectValue): - if isinstance(what.value, IntegerValue): - what_str = what.value.name or to_hex(what.value.value) + + elif isinstance(target, VarDef): + if target.vartype == VarType.CONST: + raise CodeError("cannot modify a constant", target) + what_str = target.name + if target.datatype == DataType.BYTE: + if stmt.howmuch == 1: + out("\v{:s} {:s}".format("inc" if stmt.operator == "++" else "dec", what_str)) else: - raise CodeError("invalid incr indirect type", what.value) - else: - what_str = what.name or to_hex(what.address) - if what.datatype == DataType.BYTE: - if howmuch == 1: - out("\t\t{:s} {:s}".format("inc" if stmt.operator == "++" else "dec", what_str)) - else: - with preserving_registers({'A'}): - out("\t\tlda " + what_str) + with preserving_registers({'A'}, scope, out): + out("\vlda " + what_str) if stmt.operator == "++": - out("\t\tclc") - out("\t\tadc #" + value_str) + out("\vclc") + out("\vadc #" + howmuch_str) else: - out("\t\tsec") - out("\t\tsbc #" + value_str) - out("\t\tsta " + what_str) - elif what.datatype == DataType.WORD: - if howmuch == 1: + out("\vsec") + out("\vsbc #" + howmuch_str) + out("\vsta " + what_str) + elif target.datatype == DataType.WORD: + if stmt.howmuch == 1: # mem.word +=/-= 1 if stmt.operator == "++": - out("\t\tinc " + what_str) - out("\t\tbne +") - out("\t\tinc {:s}+1".format(what_str)) + out("\vinc " + what_str) + out("\vbne +") + out("\vinc {:s}+1".format(what_str)) out("+") else: - with preserving_registers({'A'}): - out("\t\tlda " + what_str) - out("\t\tbne +") - out("\t\tdec {:s}+1".format(what_str)) + with preserving_registers({'A'}, scope, out): + out("\vlda " + what_str) + out("\vbne +") + out("\vdec {:s}+1".format(what_str)) out("+\t\tdec " + what_str) else: # mem.word +=/-= 2..255 if stmt.operator == "++": - with preserving_registers({'A'}): - out("\t\tclc") - out("\t\tlda " + what_str) - out("\t\tadc #" + value_str) - out("\t\tsta " + what_str) - out("\t\tbcc +") - out("\t\tinc {:s}+1".format(what_str)) + with preserving_registers({'A'}, scope, out): + out("\vclc") + out("\vlda " + what_str) + out("\vadc #" + howmuch_str) + out("\vsta " + what_str) + out("\vbcc +") + out("\vinc {:s}+1".format(what_str)) out("+") else: - with preserving_registers({'A'}): - out("\t\tsec") - out("\t\tlda " + what_str) - out("\t\tsbc #" + value_str) - out("\t\tsta " + what_str) - out("\t\tbcs +") - out("\t\tdec {:s}+1".format(what_str)) + with preserving_registers({'A'}, scope, out): + out("\vsec") + out("\vlda " + what_str) + out("\vsbc #" + howmuch_str) + out("\vsta " + what_str) + out("\vbcs +") + out("\vdec {:s}+1".format(what_str)) out("+") - elif what.datatype == DataType.FLOAT: - if howmuch == 1.0: + elif target.datatype == DataType.FLOAT: + if stmt.howmuch == 1.0: # special case for +/-1 - with preserving_registers({'A', 'X', 'Y'}, loads_a_within=True): - out("\t\tldx #<" + what_str) - out("\t\tldy #>" + what_str) + with preserving_registers({'A', 'X', 'Y'}, scope, out, loads_a_within=True): + out("\vldx #<" + what_str) + out("\vldy #>" + what_str) if stmt.operator == "++": - out("\t\tjsr c64flt.float_add_one") + out("\vjsr c64flt.float_add_one") else: - out("\t\tjsr c64flt.float_sub_one") - elif stmt.value.name: - with preserving_registers({'A', 'X', 'Y'}, loads_a_within=True): - out("\t\tlda #<" + stmt.value.name) - out("\t\tsta c64.SCRATCH_ZPWORD1") - out("\t\tlda #>" + stmt.value.name) - out("\t\tsta c64.SCRATCH_ZPWORD1+1") - out("\t\tldx #<" + what_str) - out("\t\tldy #>" + what_str) + out("\vjsr c64flt.float_sub_one") + elif stmt.value.name: # XXX + with preserving_registers({'A', 'X', 'Y'}, scope, out, loads_a_within=True): + out("\vlda #<" + stmt.value.name) + out("\vsta c64.SCRATCH_ZPWORD1") + out("\vlda #>" + stmt.value.name) + out("\vsta c64.SCRATCH_ZPWORD1+1") + out("\vldx #<" + what_str) + out("\vldy #>" + what_str) if stmt.operator == "++": - out("\t\tjsr c64flt.float_add_SW1_to_XY") + out("\vjsr c64flt.float_add_SW1_to_XY") else: - out("\t\tjsr c64flt.float_sub_SW1_from_XY") + out("\vjsr c64flt.float_sub_SW1_from_XY") else: raise CodeError("incr/decr missing float constant definition") else: - raise CodeError("cannot in/decrement memory of type " + str(what.datatype), howmuch) + raise CodeError("cannot in/decrement memory of type " + str(target.datatype), stmt.howmuch) + else: - raise CodeError("cannot in/decrement " + str(stmt.what)) + raise CodeError("cannot in/decrement", target) diff --git a/il65/optimize.py b/il65/optimize.py index cac4518a8..34cbeca4e 100644 --- a/il65/optimize.py +++ b/il65/optimize.py @@ -5,7 +5,7 @@ This is the optimizer that applies various optimizations to the parse tree. Written by Irmen de Jong (irmen@razorvine.net) - license: GNU GPL 3.0 """ -from .plyparse import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression +from .plyparse import Module, Subroutine, Block, Directive, Assignment, AugAssignment, Goto, Expression, IncrDecr from .plylex import print_warning, print_bold @@ -16,16 +16,17 @@ class Optimizer: def optimize(self) -> None: self.num_warnings = 0 - self.remove_useless_assigns() + self.optimize_assignments() self.combine_assignments_into_multi() self.optimize_multiassigns() self.remove_unused_subroutines() self.optimize_compare_with_zero() self.remove_empty_blocks() - def remove_useless_assigns(self): + def optimize_assignments(self): # remove assignment statements that do nothing (A=A) # and augmented assignments that have no effect (A+=0) + # convert augmented assignments to simple incr/decr if possible (A+=10 => A++ by 10) # @todo remove or simplify logical aug assigns like A |= 0, A |= true, A |= false (or perhaps turn them into byte values first?) for block, parent in self.module.all_scopes(): for assignment in list(block.nodes): @@ -45,6 +46,10 @@ class Optimizer: print("{}: shifting result is always zero".format(assignment.sourceref)) new_stmt = Assignment(left=[assignment.left], right=0, sourceref=assignment.sourceref) block.scope.replace_node(assignment, new_stmt) + if assignment.operator in ("+=", "-=") and 0 < assignment.right < 256: + new_stmt = IncrDecr(target=assignment.left, operator="++" if assignment.operator == "+=" else "--", + howmuch=assignment.right, sourceref=assignment.sourceref) + block.scope.replace_node(assignment, new_stmt) def combine_assignments_into_multi(self): # fold multiple consecutive assignments with the same rvalue into one multi-assignment diff --git a/il65/plylex.py b/il65/plylex.py index d76c006c4..10d5a23d8 100644 --- a/il65/plylex.py +++ b/il65/plylex.py @@ -230,6 +230,10 @@ def t_BOOLEAN(t): def t_DOTTEDNAME(t): r"[a-zA-Z_]\w*(\.[a-zA-Z_]\w*)+" + first, second = t.value.split(".") + if first in reserved or second in reserved: + custom_error(t, "reserved word as part of dotted name") + return None return t @@ -321,10 +325,22 @@ def t_error(t): t.lexer.skip(1) +def custom_error(t, message): + line, col = t.lineno, find_tok_column(t) + filename = getattr(t.lexer, "source_filename", "") + sref = SourceRef(filename, line, col) + if hasattr(t.lexer, "error_function"): + t.lexer.error_function(sref, message) + else: + print(sref, message, file=sys.stderr) + t.lexer.skip(1) + + def find_tok_column(token): """ Find the column of the token in its line.""" last_cr = lexer.lexdata.rfind('\n', 0, token.lexpos) - return token.lexpos - last_cr + chunk = lexer.lexdata[last_cr:token.lexpos] + return len(chunk.expandtabs()) def print_warning(text: str, sourceref: SourceRef = None) -> None: diff --git a/il65/plyparse.py b/il65/plyparse.py index 1a4b8f534..244afbe4d 100644 --- a/il65/plyparse.py +++ b/il65/plyparse.py @@ -101,7 +101,7 @@ class Scope(AstNode): symbols = attr.ib(init=False) name = attr.ib(init=False) # will be set by enclosing block, or subroutine etc. parent_scope = attr.ib(init=False, default=None) # will be wired up later - save_registers = attr.ib(type=bool, default=None, init=False) # None = look in parent scope's setting # @todo property that does that + save_registers = attr.ib(type=bool, default=None, init=False) # None = look in parent scope's setting @todo property that does that def __attrs_post_init__(self): # populate the symbol table for this scope for fast lookups via scope["name"] or scope["dotted.name"] @@ -319,19 +319,6 @@ class Assignment(AstNode): left = attr.ib(type=list) # type: List[Union[str, TargetRegisters, Dereference]] right = attr.ib() - def __attrs_post_init__(self): - self.simplify_targetregisters() - - def simplify_targetregisters(self) -> None: - # optimize TargetRegisters down to single Register if it's just one register - new_targets = [] - assert isinstance(self.left, (list, tuple)), "assignment lvalue must be sequence" - for t in self.left: - if isinstance(t, TargetRegisters) and len(t.registers) == 1: - t = t.registers[0] - new_targets.append(t) - self.left = new_targets - def process_expressions(self, scope: Scope) -> None: self.right = process_expression(self.right, scope, self.right.sourceref) @@ -393,6 +380,10 @@ class Return(AstNode): @attr.s(cmp=False, repr=False) class TargetRegisters(AstNode): + # This is a tuple of 1 or more registers. + # In it's multiple-register form it is only used to be able to parse + # the result of a subroutine call such as A,X = sub(). + # It will be replaced by a regular Register node if it contains just one register. registers = attr.ib(type=list) def add(self, register: str) -> None: @@ -527,6 +518,8 @@ class Dereference(AstNode): elif isinstance(self.datatype, DatatypeNode): assert self.size is None self.size = self.datatype.dimensions + if not self.datatype.to_enum().isnumeric(): + raise ParseError("dereference target value must be byte, word, float", self.datatype.sourceref) self.datatype = self.datatype.to_enum() @@ -545,6 +538,7 @@ class AddressOf(AstNode): @attr.s(cmp=False, repr=False) class IncrDecr(AstNode): + # increment or decrement something by a constant value (1 or more) target = attr.ib() operator = attr.ib(type=str, validator=attr.validators.in_(["++", "--"])) howmuch = attr.ib(default=1) @@ -554,6 +548,11 @@ class IncrDecr(AstNode): if self.howmuch < 0: self.howmuch = -self.howmuch self.operator = "++" if self.operator == "--" else "--" + if isinstance(self.target, Register): + if self.target.name not in REGISTER_BYTES | REGISTER_WORDS: + raise ParseError("cannot incr/decr that register", self.sourceref) + if isinstance(self.target, TargetRegisters): + raise ParseError("cannot incr/decr multiple registers at once", self.sourceref) @attr.s(cmp=False, repr=False) @@ -1087,7 +1086,7 @@ def p_incrdecr(p): incrdecr : assignment_target INCR | assignment_target DECR """ - p[0] = IncrDecr(target=p[1], operator=p[2], sourceref=_token_sref(p, 1)) + p[0] = IncrDecr(target=p[1], operator=p[2], sourceref=_token_sref(p, 2)) def p_call_subroutine(p): @@ -1316,6 +1315,11 @@ def p_assignment_target(p): | symbolname | dereference """ + if isinstance(p[1], TargetRegisters): + # if the target registers is just a single register, use that instead + if len(p[1].registers) == 1: + assert isinstance(p[1].registers[0], Register) + p[1] = p[1].registers[0] p[0] = p[1] @@ -1356,7 +1360,8 @@ def _token_sref(p, token_idx): last_cr = p.lexer.lexdata.rfind('\n', 0, p.lexpos(token_idx)) if last_cr < 0: last_cr = -1 - column = (p.lexpos(token_idx) - last_cr) + chunk = p.lexer.lexdata[last_cr:p.lexpos(token_idx)] + column = len(chunk.expandtabs()) return SourceRef(p.lexer.source_filename, p.lineno(token_idx), column) diff --git a/reference.md b/reference.md index 1bdd5efab..ae90d0fee 100644 --- a/reference.md +++ b/reference.md @@ -165,10 +165,11 @@ For most other types this prefix is not supported. **Indirect addressing:** The ``[address]`` syntax means: the contents of the memory at address, or "indirect addressing". By default, if not otherwise known, a single byte is assumed. You can add the ``.byte`` or ``.word`` or ``.float`` -type identifier suffix to make it clear what data type the address points to. -This addressing mode is only supported for constant (integer) addresses and not for variable types, -unless it is part of a subroutine call statement. For an indirect goto call, the 6502 CPU has a special instruction -(``jmp`` indirect) and an indirect subroutine call (``jsr`` indirect) is synthesized using a couple of instructions. +type identifier, inside the bracket, to make it clear what data type the address points to. +For instance: ``[address .word]`` (notice the space, to distinguish this from a dotted symbol name). +For an indirect goto call, the 6502 CPU has a special instruction +(``jmp`` indirect) and an indirect subroutine call (``jsr`` indirect) is emitted +using a couple of instructions. Program Structure diff --git a/tests/test_parser.py b/tests/test_parser.py index 566fd1b98..ac73cb50d 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -180,3 +180,29 @@ def test_parser_2(): assert isinstance(call.target.target, SymbolName) assert call.target.target.name == "zz" assert call.target.address_of is True + + +test_source_3 = """ +~ { + goto.XY = 5 + AX.text = 5 + [$c000.word] = 5 + [AX.word] = 5 +} +""" + +def test_typespec(): + lexer.lineno = 1 + lexer.source_filename = "sourcefile" + filter = TokenFilter(lexer) + result = parser.parse(input=test_source_3, tokenfunc=filter.token) + nodes = result.nodes[0].nodes + assignment1, assignment2, assignment3, assignment4 = nodes + assert assignment1.right.value == 5 + assert assignment2.right.value == 5 + assert assignment3.right.value == 5 + assert assignment4.right.value == 5 + print("A1", assignment1.left) + print("A2", assignment2.left) + print("A3", assignment3.left) + print("A4", assignment4.left) diff --git a/todo.ill b/todo.ill index 02dbaa4d3..4630fe919 100644 --- a/todo.ill +++ b/todo.ill @@ -5,6 +5,7 @@ var zp1_1 = 200 var zp1_2 = 200 + var .float zpf1 var .text zp_s1 = "hello\n" var .ptext zp_s2 = "goodbye\n" var .stext zp_s3 = "welcome\n" @@ -13,10 +14,31 @@ var .array(20) arr1 = $ea var .wordarray(20) arr2 = $ea + memory border = $d020 + const .word cword = 2 start: %breakpoint abc,def + A++ + X-- + A+=1 + X-=2 + border++ + zp1_1++ + zpf1++ + [AX]++ + [AX .byte]++ + [AX .word]++ + [AX .float]++ + [$ccc0]++ + [$ccc0 .byte]++ + [$ccc0 .word]++ + [$ccc0 .float]++ + A+=2 + XY+=666 + + foobar() return 44