diff --git a/il65/astparse.py b/il65/astparse.py index 8fa3e4a74..c3c12c686 100644 --- a/il65/astparse.py +++ b/il65/astparse.py @@ -12,10 +12,10 @@ from .symbols import FLOAT_MAX_POSITIVE, FLOAT_MAX_NEGATIVE, SourceRef, SymbolTa class ParseError(Exception): - def __init__(self, message: str, text: str, sourceref: SourceRef) -> None: + def __init__(self, message: str, sourcetext: str, sourceref: SourceRef) -> None: self.sourceref = sourceref self.msg = message - self.text = text + self.sourcetext = sourcetext def __str__(self): return "{} {:s}".format(self.sourceref, self.msg) diff --git a/il65/codegen.py b/il65/codegen.py index a4052877f..1e4a57ffa 100644 --- a/il65/codegen.py +++ b/il65/codegen.py @@ -49,8 +49,9 @@ class CodeGenerator: for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]: if zpblock.label_names: raise CodeError("ZP block cannot contain labels") - if zpblock.statements: - raise CodeError("ZP block cannot contain code statements") + # can only contain code comments, or nothing at all + if not all(isinstance(s, ParseResult.Comment) for s in zpblock.statements): + raise CodeError("ZP block cannot contain code statements, only definitions and comments") def optimize(self) -> None: # optimize the generated assembly code @@ -182,11 +183,24 @@ class CodeGenerator: self.p("\t\tjmp {:s}.start\t\t; call user code".format(main_block_label)) def blocks(self) -> None: - # if there's a Zeropage block, it always goes first + # if there's a
block, it always goes second + for block in [b for b in self.parsed.blocks if b.name == "
"]: + self.cur_block = block + for s in block.statements: + if isinstance(s, ParseResult.Comment): + self.p(s.text) + else: + raise CodeError("header block cannot contain any other statements beside comments") + self.p("\n") + # if there's a Zeropage block, it always goes second for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]: - assert not zpblock.statements self.cur_block = zpblock self.p("\n; ---- zero page block: '{:s}' ----\t\t; src l. {:d}\n".format(zpblock.sourceref.file, zpblock.sourceref.line)) + for s in zpblock.statements: + if isinstance(s, ParseResult.Comment): + self.p(s.text) + else: + raise CodeError("zp cannot contain any other statements beside comments") self.p("{:s}\t.proc\n".format(zpblock.label)) self.generate_block_vars(zpblock) self.p("\t.pend\n") @@ -205,8 +219,8 @@ class CodeGenerator: block.statements = statements # generate for block in sorted(self.parsed.blocks, key=lambda b: b.address): - if block.name == "ZP": - continue # zeropage block is already processed + if block.name in ("ZP", "
"): + continue # these blocks are already processed self.cur_block = block self.p("\n; ---- next block: '{:s}' ----\t\t; src l. {:d}\n".format(block.sourceref.file, block.sourceref.line)) if block.address: @@ -387,7 +401,9 @@ class CodeGenerator: with self.preserving_registers(preserve_regs): if stmt.target in REGISTER_WORDS: if stmt.preserve_regs: - # cannot use zp scratch + # cannot use zp scratch. This is very inefficient code! + print("warning: {:s}:{:d}: indirect register pair call, this is very inefficient" + .format(self.cur_block.sourceref.file, stmt.lineno)) self.p("\t\tst{:s} ++".format(stmt.target[0].lower())) self.p("\t\tst{:s} +++".format(stmt.target[1].lower())) self.p("\t\tjsr +") @@ -409,15 +425,16 @@ class CodeGenerator: self.p("+\t\tjmp ({:s})".format(stmt.target)) self.p("+") else: - preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set() + preserve_regs = {'A', 'X', 'Y'} if not stmt.is_goto and stmt.preserve_regs else set() with self.preserving_registers(preserve_regs): self.p("\t\tjsr " + stmt.target) - elif isinstance(stmt, ParseResult.InlineAsm): self.p("\t\t; inline asm, src l. {:d}".format(stmt.lineno)) for line in stmt.asmlines: self.p(line) self.p("\t\t; end inline asm, src l. {:d}".format(stmt.lineno)) + elif isinstance(stmt, ParseResult.Comment): + self.p(stmt.text) else: raise CodeError("unknown statement " + repr(stmt)) self.previous_stmt_was_assignment = isinstance(stmt, ParseResult.AssignmentStmt) @@ -640,7 +657,7 @@ class CodeGenerator: else: if rvalue.datatype != DataType.WORD: raise CodeError("can only assign a word to a register pair") - raise NotImplementedError # @todo other mmapped types + raise NotImplementedError("some mmap type assignment") # @todo other mmapped types def generate_assign_mem_to_mem(self, lv: ParseResult.MemMappedValue, rvalue: ParseResult.MemMappedValue) -> None: r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.address) @@ -652,7 +669,6 @@ class CodeGenerator: self.p("\t\tsta " + (lv.name or Parser.to_hex(lv.address))) elif lv.datatype == DataType.WORD: if rvalue.datatype == DataType.BYTE: - raise NotImplementedError # XXX with self.preserving_registers({'A'}): l_str = lv.name or Parser.to_hex(lv.address) self.p("\t\tlda #0") @@ -667,10 +683,10 @@ class CodeGenerator: self.p("\t\tlda {:s}+1".format(r_str)) self.p("\t\tsta {:s}+1".format(l_str)) else: - # @todo other mmapped types raise CodeError("can only assign a byte or word to a word") else: - raise CodeError("can only assign to a memory mapped byte or word value for now") # @todo + raise CodeError("can only assign to a memory mapped byte or word value for now " + "(if you need other types, can't you use a var?)") def generate_assign_char_to_memory(self, lv: ParseResult.MemMappedValue, char_str: str) -> None: # Memory = Character diff --git a/il65/main.py b/il65/main.py index 02af875d8..387366453 100644 --- a/il65/main.py +++ b/il65/main.py @@ -19,6 +19,7 @@ def main() -> None: description = "Compiler for IL65 language, code name 'Sick'" ap = argparse.ArgumentParser(description=description) ap.add_argument("-o", "--output", help="output directory") + ap.add_argument("--noopt", action="store_true", help="do not optimize the parse tree") ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile") args = ap.parse_args() assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm" @@ -37,8 +38,11 @@ def main() -> None: p = Parser(args.sourcefile, args.output, sourcelines, ppsymbols=symbols) parsed = p.parse() if parsed: - opt = Optimizer(parsed) - parsed = opt.optimize() + if args.noopt: + print("not optimizing the parse tree!") + else: + opt = Optimizer(parsed) + parsed = opt.optimize() cg = CodeGenerator(parsed) cg.generate() cg.optimize() diff --git a/il65/parse.py b/il65/parse.py index 9b1df1fd0..b0671b509 100644 --- a/il65/parse.py +++ b/il65/parse.py @@ -39,7 +39,7 @@ class ParseResult: _unnamed_block_labels = {} # type: Dict[ParseResult.Block, str] def __init__(self, name: str, sourceref: SourceRef, parent_scope: SymbolTable) -> None: - self.sourceref = sourceref + self.sourceref = sourceref.copy() self.address = 0 self.name = name self.statements = [] # type: List[ParseResult._AstNode] @@ -80,10 +80,11 @@ class ParseResult: self.statements = statements class Value: - def __init__(self, datatype: DataType, name: str=None, constant: bool=False) -> None: + def __init__(self, datatype: DataType, name: str=None, constant: bool=False, indirect: bool=False) -> None: self.datatype = datatype self.name = name self.constant = constant + self.indirect = indirect # is the value accessed as [value], "take the contents of the memory address it points to" @todo def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]: if self.constant: @@ -278,6 +279,10 @@ class ParseResult: class _AstNode: # @todo merge Value with this? pass + class Comment(_AstNode): + def __init__(self, text: str) -> None: + self.text = text + class Label(_AstNode): def __init__(self, name: str, lineno: int) -> None: self.name = name @@ -365,7 +370,14 @@ class ParseResult: self.blocks.append(block) def merge(self, parsed: 'ParseResult') -> None: - self.blocks.extend(parsed.blocks) + existing_blocknames = set(block.name for block in self.blocks) + other_blocknames = set(block.name for block in parsed.blocks) + overlap = existing_blocknames & other_blocknames + if overlap != {"
"}: + raise SymbolError("double block names: {}".format(overlap)) + for block in parsed.blocks: + if block.name != "
": + self.blocks.append(block) class Parser: @@ -388,13 +400,17 @@ class Parser: def load_source(self, filename: str) -> List[Tuple[int, str]]: with open(filename, "rU") as source: sourcelines = source.readlines() - # store all lines that are not empty or a comment, and strip any other comments + # store all lines that aren't empty + # comments are kept (end-of-line comments are put on a separate line) lines = [] for num, line in enumerate(sourcelines, start=1): line2 = line.strip() - if not line2 or line2.startswith(";"): - continue - lines.append((num, line.partition(";")[0].rstrip())) + if line2: + line, sep, comment = line.partition(";") + if comment: + lines.append((num, "; " + comment.strip())) + if line.rstrip(): + lines.append((num, line.rstrip())) return lines def parse(self) -> Optional[ParseResult]: @@ -402,8 +418,9 @@ class Parser: try: return self.parse_file() except ParseError as x: - if x.text: - print("\tsource text: '{:s}'".format(x.text)) + print() + if x.sourcetext: + print("\tsource text: '{:s}'".format(x.sourcetext)) if x.sourceref.column: print("\t" + ' '*x.sourceref.column + ' ^') if self.parsing_import: @@ -412,7 +429,7 @@ class Parser: print("Error:", str(x)) raise # XXX temporary solution to get stack trace info in the event of parse errors except Exception as x: - print("ERROR: internal parser error: ", x) + print("\nERROR: internal parser error: ", x) print(" file:", self.sourceref.file, "block:", self.cur_block.name, "line:", self.sourceref.line) raise # XXX temporary solution to get stack trace info in the event of parse errors @@ -425,16 +442,28 @@ class Parser: def print_warning(self, text: str) -> None: print(text) + def _parse_comments(self) -> None: + while True: + line = self.next_line().lstrip() + if line.startswith(';'): + self.cur_block.statements.append(ParseResult.Comment(line)) + continue + self.prev_line() + break + def _parse_1(self) -> None: + self.cur_block = ParseResult.Block("
", self.sourceref, self.root_scope) + self.result.add_block(self.cur_block) self.parse_header() zeropage.configure(self.result.clobberzp) while True: - next_line = self.peek_next_line() - if next_line.lstrip().startswith("~"): + self._parse_comments() + next_line = self.peek_next_line().lstrip() + if next_line.startswith("~"): block = self.parse_block() if block: self.result.add_block(block) - elif next_line.lstrip().startswith("import"): + elif next_line.startswith("import"): self.parse_import() else: break @@ -544,6 +573,7 @@ class Parser: self.result.format = ProgramFormat.RAW output_specified = False while True: + self._parse_comments() line = self.next_line() if line.startswith("output"): if output_specified: @@ -627,8 +657,11 @@ class Parser: print("\ncontinuing", self.sourceref.file) if result: # merge the symbol table of the imported file into our own - self.root_scope.merge_roots(parser.root_scope) - self.result.merge(result) + try: + self.root_scope.merge_roots(parser.root_scope) + self.result.merge(result) + except SymbolError as x: + raise self.PError(str(x)) return else: raise self.PError("Error while parsing imported file") @@ -639,13 +672,15 @@ class Parser: def parse_block(self) -> ParseResult.Block: # first line contains block header "~ [name] [addr]" followed by a '{' + self._parse_comments() line = self.next_line() + block_def_lineno = self.sourceref.line line = line.lstrip() if not line.startswith("~"): raise self.PError("expected '~' (block)") block_args = line[1:].split() arg = "" - self.cur_block = ParseResult.Block("", self.sourceref, self.root_scope) + self.cur_block = ParseResult.Block("", self.sourceref.copy(), self.root_scope) is_zp_block = False while block_args: arg = block_args.pop(0) @@ -659,9 +694,9 @@ class Parser: raise self.PError("duplicate block name '{:s}', original definition at {}".format(arg, orig.sourceref)) self.cur_block = orig # zero page block occurrences are merged else: - self.cur_block = ParseResult.Block(arg, self.sourceref, self.root_scope) + self.cur_block = ParseResult.Block(arg, self.sourceref.copy(), self.root_scope) try: - self.root_scope.define_scope(self.cur_block.symbols, self.sourceref) + self.root_scope.define_scope(self.cur_block.symbols, self.cur_block.sourceref) except SymbolError as x: raise self.PError(str(x)) elif arg == "{": @@ -695,6 +730,7 @@ class Parser: else: print(" parsing block '{:s}'".format(self.cur_block.name)) while True: + self._parse_comments() line = self.next_line() unstripped_line = line line = line.strip() @@ -909,7 +945,7 @@ class Parser: if isinstance(target, VariableDef): # checks if target.address is not None: - raise self.PError("invalid call target (should be label or address)") + raise self.PError("can only call a constant expression (label, address, const)") if target.type != DataType.WORD: raise self.PError("invalid call target (should be 16-bit address)") else: @@ -1014,6 +1050,7 @@ class Parser: def parse_asm(self) -> ParseResult.InlineAsm: line = self.next_line() + lineno = self.sourceref.line aline = line.split() if not len(aline) == 2 or aline[0] != "asm" or aline[1] != "{": raise self.PError("invalid asm start") @@ -1021,7 +1058,7 @@ class Parser: while True: line = self.next_line() if line.strip() == "}": - return ParseResult.InlineAsm(self.sourceref.line, asmlines) + return ParseResult.InlineAsm(lineno, asmlines) asmlines.append(line) def parse_asminclude(self, line: str) -> ParseResult.InlineAsm: @@ -1131,7 +1168,9 @@ class Parser: elif text.startswith('[') and text.endswith(']'): num_or_name = text[1:-1].strip() word_type = float_type = False - if num_or_name.endswith(".word"): + if num_or_name.endswith(".byte"): + num_or_name = num_or_name[:-5] + elif num_or_name.endswith(".word"): word_type = True num_or_name = num_or_name[:-5] elif num_or_name.endswith(".float"): @@ -1150,12 +1189,12 @@ class Parser: raise TypeError("integer required") elif isinstance(sym, VariableDef): if sym.type == DataType.BYTE and (word_type or float_type): - raise self.PError("byte value required") + raise self.PError("invalid type modifier, byte expected") elif sym.type == DataType.WORD and float_type: - raise self.PError("word value required") + raise self.PError("invalid type modifier, word expected") return ParseResult.MemMappedValue(sym.address, sym.type, sym.length, sym.name) else: - raise self.PError("invalid symbol type used as lvalue of assignment (3)") + raise self.PError("invalid symbol type used as lvalue of assignment") else: addr = parse_expr_as_int(num_or_name, self.cur_block.symbols, self.ppsymbols, self.sourceref) if word_type: diff --git a/il65/symbols.py b/il65/symbols.py index 393d9dc43..d504c5bc9 100644 --- a/il65/symbols.py +++ b/il65/symbols.py @@ -95,7 +95,7 @@ class SymbolDefinition: def __init__(self, blockname: str, name: str, sourceref: SourceRef, allocate: bool) -> None: self.blockname = blockname self.name = name - self.sourceref = sourceref + self.sourceref = sourceref.copy() self.allocate = allocate # set to false if the variable is memory mapped (or a constant) instead of allocated global _identifier_seq_nr self.seq_nr = _identifier_seq_nr @@ -314,6 +314,8 @@ class SymbolTable: if identifier: if isinstance(identifier, SymbolDefinition): raise SymbolError("identifier was already defined at " + str(identifier.sourceref)) + elif isinstance(identifier, SymbolTable): + raise SymbolError("identifier already defined as block at " + str(identifier.owning_block.sourceref)) raise SymbolError("identifier already defined as " + str(type(identifier))) if name in MATH_SYMBOLS: print("warning: {}: identifier shadows a name from the math module".format(sourceref)) @@ -402,7 +404,11 @@ class SymbolTable: def merge_roots(self, other_root: 'SymbolTable') -> None: for name, thing in other_root.symbols.items(): if isinstance(thing, SymbolTable): - self.define_scope(thing, thing.owning_block.sourceref) + try: + self.define_scope(thing, thing.owning_block.sourceref) + except SymbolError as x: + raise SymbolError("problematic symbol '{:s}' from {}; {:s}" + .format(thing.name, thing.owning_block.sourceref, str(x))) from None def print_table(self, summary_only: bool=False) -> None: if summary_only: diff --git a/reference.txt b/reference.txt index 462d0cc15..930c15d05 100644 --- a/reference.txt +++ b/reference.txt @@ -158,9 +158,7 @@ The syntax "[address]" means: the contents of the memory at address. By default, if not otherwise known, a single byte is assumed. You can add the ".byte" or ".word" or ".float" suffix to make it clear what data type the address points to. -Everything after a semicolon ';' is a comment and is ignored. -# @todo Everything after a double semicolon ';;' is a comment and is ignored, but is copied into the resulting assembly source code. - +Everything after a semicolon ';' is a comment and is ignored, however the comment is copied into the resulting assembly source code. FLOW CONTROL diff --git a/testsource/calls.ill b/testsource/calls.ill index 3300d0447..e4bdb6ebc 100644 --- a/testsource/calls.ill +++ b/testsource/calls.ill @@ -17,10 +17,9 @@ bar - go [AX] ; @todo check indrection jmp (AX) - go [var1] ; @todo check indirection jmp (var1) - go [#mem1] ; @todo check indirection jmp ($cff0) - ; go mem1 ; @todo support this, should jmp $cff0 + go [AX] + go [var1] + go [#mem1] go [$c2] go [$c2dd] go $c000 @@ -36,7 +35,6 @@ bar fcall [XY] fcall [var1] fcall [#mem1] - ;fcall mem1 ; @todo support this, should jsr $cff0 fcall [$c2] fcall [$c2dd] fcall $c000 @@ -52,13 +50,19 @@ bar call [AX] call [var1] call [#mem1] - ;call mem1 ; @todo support this, should jsr $cff0 call [$c2] call [$c2dd] call $c000 call $c2 + asm { + nop + nop + nop + nop + } + call constw call sub1 call main.start diff --git a/testsource/dtypes.ill b/testsource/dtypes.ill index dbc208373..c798b7772 100644 --- a/testsource/dtypes.ill +++ b/testsource/dtypes.ill @@ -1,4 +1,4 @@ -; var defintions and immediate primitive data type tests +; var definitions and immediate primitive data type tests output raw clobberzp @@ -145,11 +145,17 @@ start A = 255 A = X A = [$c020] - A = [membyte2] - ;A = [membyte2.byte] ; @todo ok - ;A = [membyte2.word] ; @todo type error - ;A = [membyte2.float] ; @todo type error - ; A = #expr_byte1b ; @todo cannot assign address to byte + A = [#membyte2] + A = membyte2 + A = [membyte2] ; @todo error, invalid rvalue, use membyte without indirect? + A = [membyte2.byte] ; @todo error, " + A = expr_byte1b ; @todo ok + ;A = #expr_byte1b ; @todo cannot assign address to byte, correct error + ;A = cbyte3 ; @todo fix assignment to lda #cybte3 + ;A = [cbyte3] ; @todo error invalid rvalue + A = initbytea0 + A = [initbytea0] ; @todo error, invalid rvalue, use initbytea0 without indirect? + XY = 0 XY = '@' @@ -166,6 +172,14 @@ start AX = "" AX = XY AX = Y + ;XY = [membyte2] ; @todo ok pad + ;XY = [membyte2.byte] ; @todo ok pad + ;XY = membyte2 ; @todo ok pad + ;XY = #membyte2 ; @todo ok + ;XY = [memword1] ; @todo ok + ;XY = [memword1.byte] ; @todo ok pad + ;XY = [memword1.word] ; @todo ok + [$c000] = 255 [$c000] = '@' @@ -179,16 +193,11 @@ start [$c000.float] = 65535 [$c000.float] = 456.66 [$c000.float] = 1.70141183e+38 - ;[$c000.byte] = AX ; @todo out of range [$c000.word] = AX [$c001] = [$c002] - ;[$c001.word] = [$c002] ;@todo okay (pad) - ;[$c001.word] = [$c002.byte] ;@todo okay (pad) - [$c001.word] = [$c002.word] - ;[$c001.word] = [$c002.float] ;@todo parse error - ;[$c001.float] = [$c002.byte] ;@todo ok - ;[$c001.float] = [$c002.word] ;@todo support this - ;[$c001.float] = [$c002.float] ;@todo support this + [$c111.word] = [$c222] + [$c112.word] = [$c223.byte] + [$c222.word] = [$c333.word] SC = 0 @@ -220,6 +229,10 @@ start membyte1 = 22 memword1 = 2233 memfloat = 3.4567 + ;[membyte1] = 33 ; @todo error, invalid lvalue, use without [] + [memword1] = 4444 + ;[memword1] = [AX] ; @todo error, only address allowed in [] + [memfloat] = 5.5566 membyte1 = A memword1 = A @@ -239,4 +252,3 @@ start [$c100.word] = "text-immediate" ; reuse [$c200.word] = "" ; reuse } - diff --git a/testsource/source1.ill b/testsource/source1.ill index 13170282c..16b3c6911 100644 --- a/testsource/source1.ill +++ b/testsource/source1.ill @@ -1,5 +1,8 @@ ; source IL file ; these are comments +; line 2 comment +; line 3 comment + output prg,sys ; create a c-64 program with basic SYS call to launch it ;clobberzp restore ; clobber over the zp memory normally used by basic/kernel rom, frees up more zp @@ -8,6 +11,8 @@ output prg,sys ; create a c-64 program with basic SYS call to launch it ~main $0a00 { + ; this is the main block with the start routine. + memory screen = $d021 memory border = $d020 memory cursor = 646 @@ -126,6 +131,8 @@ somelabel1 return } +; comments before block 4 + ~ block4 { A=1 A=2 @@ -158,3 +165,7 @@ subroutine return ,,$99 } + + +; comment at end +; another one