various fixes

This commit is contained in:
Irmen de Jong 2017-12-21 23:05:35 +01:00
parent 2110e7afef
commit 504ee1a880
9 changed files with 157 additions and 67 deletions

View File

@ -12,10 +12,10 @@ from .symbols import FLOAT_MAX_POSITIVE, FLOAT_MAX_NEGATIVE, SourceRef, SymbolTa
class ParseError(Exception):
def __init__(self, message: str, text: str, sourceref: SourceRef) -> None:
def __init__(self, message: str, sourcetext: str, sourceref: SourceRef) -> None:
self.sourceref = sourceref
self.msg = message
self.text = text
self.sourcetext = sourcetext
def __str__(self):
return "{} {:s}".format(self.sourceref, self.msg)

View File

@ -49,8 +49,9 @@ class CodeGenerator:
for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]:
if zpblock.label_names:
raise CodeError("ZP block cannot contain labels")
if zpblock.statements:
raise CodeError("ZP block cannot contain code statements")
# can only contain code comments, or nothing at all
if not all(isinstance(s, ParseResult.Comment) for s in zpblock.statements):
raise CodeError("ZP block cannot contain code statements, only definitions and comments")
def optimize(self) -> None:
# optimize the generated assembly code
@ -182,11 +183,24 @@ class CodeGenerator:
self.p("\t\tjmp {:s}.start\t\t; call user code".format(main_block_label))
def blocks(self) -> None:
# if there's a Zeropage block, it always goes first
# if there's a <header> block, it always goes second
for block in [b for b in self.parsed.blocks if b.name == "<header>"]:
self.cur_block = block
for s in block.statements:
if isinstance(s, ParseResult.Comment):
self.p(s.text)
else:
raise CodeError("header block cannot contain any other statements beside comments")
self.p("\n")
# if there's a Zeropage block, it always goes second
for zpblock in [b for b in self.parsed.blocks if b.name == "ZP"]:
assert not zpblock.statements
self.cur_block = zpblock
self.p("\n; ---- zero page block: '{:s}' ----\t\t; src l. {:d}\n".format(zpblock.sourceref.file, zpblock.sourceref.line))
for s in zpblock.statements:
if isinstance(s, ParseResult.Comment):
self.p(s.text)
else:
raise CodeError("zp cannot contain any other statements beside comments")
self.p("{:s}\t.proc\n".format(zpblock.label))
self.generate_block_vars(zpblock)
self.p("\t.pend\n")
@ -205,8 +219,8 @@ class CodeGenerator:
block.statements = statements
# generate
for block in sorted(self.parsed.blocks, key=lambda b: b.address):
if block.name == "ZP":
continue # zeropage block is already processed
if block.name in ("ZP", "<header>"):
continue # these blocks are already processed
self.cur_block = block
self.p("\n; ---- next block: '{:s}' ----\t\t; src l. {:d}\n".format(block.sourceref.file, block.sourceref.line))
if block.address:
@ -387,7 +401,9 @@ class CodeGenerator:
with self.preserving_registers(preserve_regs):
if stmt.target in REGISTER_WORDS:
if stmt.preserve_regs:
# cannot use zp scratch
# cannot use zp scratch. This is very inefficient code!
print("warning: {:s}:{:d}: indirect register pair call, this is very inefficient"
.format(self.cur_block.sourceref.file, stmt.lineno))
self.p("\t\tst{:s} ++".format(stmt.target[0].lower()))
self.p("\t\tst{:s} +++".format(stmt.target[1].lower()))
self.p("\t\tjsr +")
@ -409,15 +425,16 @@ class CodeGenerator:
self.p("+\t\tjmp ({:s})".format(stmt.target))
self.p("+")
else:
preserve_regs = {'A', 'X', 'Y'} if stmt.preserve_regs else set()
preserve_regs = {'A', 'X', 'Y'} if not stmt.is_goto and stmt.preserve_regs else set()
with self.preserving_registers(preserve_regs):
self.p("\t\tjsr " + stmt.target)
elif isinstance(stmt, ParseResult.InlineAsm):
self.p("\t\t; inline asm, src l. {:d}".format(stmt.lineno))
for line in stmt.asmlines:
self.p(line)
self.p("\t\t; end inline asm, src l. {:d}".format(stmt.lineno))
elif isinstance(stmt, ParseResult.Comment):
self.p(stmt.text)
else:
raise CodeError("unknown statement " + repr(stmt))
self.previous_stmt_was_assignment = isinstance(stmt, ParseResult.AssignmentStmt)
@ -640,7 +657,7 @@ class CodeGenerator:
else:
if rvalue.datatype != DataType.WORD:
raise CodeError("can only assign a word to a register pair")
raise NotImplementedError # @todo other mmapped types
raise NotImplementedError("some mmap type assignment") # @todo other mmapped types
def generate_assign_mem_to_mem(self, lv: ParseResult.MemMappedValue, rvalue: ParseResult.MemMappedValue) -> None:
r_str = rvalue.name if rvalue.name else "${:x}".format(rvalue.address)
@ -652,7 +669,6 @@ class CodeGenerator:
self.p("\t\tsta " + (lv.name or Parser.to_hex(lv.address)))
elif lv.datatype == DataType.WORD:
if rvalue.datatype == DataType.BYTE:
raise NotImplementedError # XXX
with self.preserving_registers({'A'}):
l_str = lv.name or Parser.to_hex(lv.address)
self.p("\t\tlda #0")
@ -667,10 +683,10 @@ class CodeGenerator:
self.p("\t\tlda {:s}+1".format(r_str))
self.p("\t\tsta {:s}+1".format(l_str))
else:
# @todo other mmapped types
raise CodeError("can only assign a byte or word to a word")
else:
raise CodeError("can only assign to a memory mapped byte or word value for now") # @todo
raise CodeError("can only assign to a memory mapped byte or word value for now "
"(if you need other types, can't you use a var?)")
def generate_assign_char_to_memory(self, lv: ParseResult.MemMappedValue, char_str: str) -> None:
# Memory = Character

View File

@ -19,6 +19,7 @@ def main() -> None:
description = "Compiler for IL65 language, code name 'Sick'"
ap = argparse.ArgumentParser(description=description)
ap.add_argument("-o", "--output", help="output directory")
ap.add_argument("--noopt", action="store_true", help="do not optimize the parse tree")
ap.add_argument("sourcefile", help="the source .ill/.il65 file to compile")
args = ap.parse_args()
assembly_filename = os.path.splitext(args.sourcefile)[0] + ".asm"
@ -37,8 +38,11 @@ def main() -> None:
p = Parser(args.sourcefile, args.output, sourcelines, ppsymbols=symbols)
parsed = p.parse()
if parsed:
opt = Optimizer(parsed)
parsed = opt.optimize()
if args.noopt:
print("not optimizing the parse tree!")
else:
opt = Optimizer(parsed)
parsed = opt.optimize()
cg = CodeGenerator(parsed)
cg.generate()
cg.optimize()

View File

@ -39,7 +39,7 @@ class ParseResult:
_unnamed_block_labels = {} # type: Dict[ParseResult.Block, str]
def __init__(self, name: str, sourceref: SourceRef, parent_scope: SymbolTable) -> None:
self.sourceref = sourceref
self.sourceref = sourceref.copy()
self.address = 0
self.name = name
self.statements = [] # type: List[ParseResult._AstNode]
@ -80,10 +80,11 @@ class ParseResult:
self.statements = statements
class Value:
def __init__(self, datatype: DataType, name: str=None, constant: bool=False) -> None:
def __init__(self, datatype: DataType, name: str=None, constant: bool=False, indirect: bool=False) -> None:
self.datatype = datatype
self.name = name
self.constant = constant
self.indirect = indirect # is the value accessed as [value], "take the contents of the memory address it points to" @todo
def assignable_from(self, other: 'ParseResult.Value') -> Tuple[bool, str]:
if self.constant:
@ -278,6 +279,10 @@ class ParseResult:
class _AstNode: # @todo merge Value with this?
pass
class Comment(_AstNode):
def __init__(self, text: str) -> None:
self.text = text
class Label(_AstNode):
def __init__(self, name: str, lineno: int) -> None:
self.name = name
@ -365,7 +370,14 @@ class ParseResult:
self.blocks.append(block)
def merge(self, parsed: 'ParseResult') -> None:
self.blocks.extend(parsed.blocks)
existing_blocknames = set(block.name for block in self.blocks)
other_blocknames = set(block.name for block in parsed.blocks)
overlap = existing_blocknames & other_blocknames
if overlap != {"<header>"}:
raise SymbolError("double block names: {}".format(overlap))
for block in parsed.blocks:
if block.name != "<header>":
self.blocks.append(block)
class Parser:
@ -388,13 +400,17 @@ class Parser:
def load_source(self, filename: str) -> List[Tuple[int, str]]:
with open(filename, "rU") as source:
sourcelines = source.readlines()
# store all lines that are not empty or a comment, and strip any other comments
# store all lines that aren't empty
# comments are kept (end-of-line comments are put on a separate line)
lines = []
for num, line in enumerate(sourcelines, start=1):
line2 = line.strip()
if not line2 or line2.startswith(";"):
continue
lines.append((num, line.partition(";")[0].rstrip()))
if line2:
line, sep, comment = line.partition(";")
if comment:
lines.append((num, "; " + comment.strip()))
if line.rstrip():
lines.append((num, line.rstrip()))
return lines
def parse(self) -> Optional[ParseResult]:
@ -402,8 +418,9 @@ class Parser:
try:
return self.parse_file()
except ParseError as x:
if x.text:
print("\tsource text: '{:s}'".format(x.text))
print()
if x.sourcetext:
print("\tsource text: '{:s}'".format(x.sourcetext))
if x.sourceref.column:
print("\t" + ' '*x.sourceref.column + ' ^')
if self.parsing_import:
@ -412,7 +429,7 @@ class Parser:
print("Error:", str(x))
raise # XXX temporary solution to get stack trace info in the event of parse errors
except Exception as x:
print("ERROR: internal parser error: ", x)
print("\nERROR: internal parser error: ", x)
print(" file:", self.sourceref.file, "block:", self.cur_block.name, "line:", self.sourceref.line)
raise # XXX temporary solution to get stack trace info in the event of parse errors
@ -425,16 +442,28 @@ class Parser:
def print_warning(self, text: str) -> None:
print(text)
def _parse_comments(self) -> None:
while True:
line = self.next_line().lstrip()
if line.startswith(';'):
self.cur_block.statements.append(ParseResult.Comment(line))
continue
self.prev_line()
break
def _parse_1(self) -> None:
self.cur_block = ParseResult.Block("<header>", self.sourceref, self.root_scope)
self.result.add_block(self.cur_block)
self.parse_header()
zeropage.configure(self.result.clobberzp)
while True:
next_line = self.peek_next_line()
if next_line.lstrip().startswith("~"):
self._parse_comments()
next_line = self.peek_next_line().lstrip()
if next_line.startswith("~"):
block = self.parse_block()
if block:
self.result.add_block(block)
elif next_line.lstrip().startswith("import"):
elif next_line.startswith("import"):
self.parse_import()
else:
break
@ -544,6 +573,7 @@ class Parser:
self.result.format = ProgramFormat.RAW
output_specified = False
while True:
self._parse_comments()
line = self.next_line()
if line.startswith("output"):
if output_specified:
@ -627,8 +657,11 @@ class Parser:
print("\ncontinuing", self.sourceref.file)
if result:
# merge the symbol table of the imported file into our own
self.root_scope.merge_roots(parser.root_scope)
self.result.merge(result)
try:
self.root_scope.merge_roots(parser.root_scope)
self.result.merge(result)
except SymbolError as x:
raise self.PError(str(x))
return
else:
raise self.PError("Error while parsing imported file")
@ -639,13 +672,15 @@ class Parser:
def parse_block(self) -> ParseResult.Block:
# first line contains block header "~ [name] [addr]" followed by a '{'
self._parse_comments()
line = self.next_line()
block_def_lineno = self.sourceref.line
line = line.lstrip()
if not line.startswith("~"):
raise self.PError("expected '~' (block)")
block_args = line[1:].split()
arg = ""
self.cur_block = ParseResult.Block("", self.sourceref, self.root_scope)
self.cur_block = ParseResult.Block("", self.sourceref.copy(), self.root_scope)
is_zp_block = False
while block_args:
arg = block_args.pop(0)
@ -659,9 +694,9 @@ class Parser:
raise self.PError("duplicate block name '{:s}', original definition at {}".format(arg, orig.sourceref))
self.cur_block = orig # zero page block occurrences are merged
else:
self.cur_block = ParseResult.Block(arg, self.sourceref, self.root_scope)
self.cur_block = ParseResult.Block(arg, self.sourceref.copy(), self.root_scope)
try:
self.root_scope.define_scope(self.cur_block.symbols, self.sourceref)
self.root_scope.define_scope(self.cur_block.symbols, self.cur_block.sourceref)
except SymbolError as x:
raise self.PError(str(x))
elif arg == "{":
@ -695,6 +730,7 @@ class Parser:
else:
print(" parsing block '{:s}'".format(self.cur_block.name))
while True:
self._parse_comments()
line = self.next_line()
unstripped_line = line
line = line.strip()
@ -909,7 +945,7 @@ class Parser:
if isinstance(target, VariableDef):
# checks
if target.address is not None:
raise self.PError("invalid call target (should be label or address)")
raise self.PError("can only call a constant expression (label, address, const)")
if target.type != DataType.WORD:
raise self.PError("invalid call target (should be 16-bit address)")
else:
@ -1014,6 +1050,7 @@ class Parser:
def parse_asm(self) -> ParseResult.InlineAsm:
line = self.next_line()
lineno = self.sourceref.line
aline = line.split()
if not len(aline) == 2 or aline[0] != "asm" or aline[1] != "{":
raise self.PError("invalid asm start")
@ -1021,7 +1058,7 @@ class Parser:
while True:
line = self.next_line()
if line.strip() == "}":
return ParseResult.InlineAsm(self.sourceref.line, asmlines)
return ParseResult.InlineAsm(lineno, asmlines)
asmlines.append(line)
def parse_asminclude(self, line: str) -> ParseResult.InlineAsm:
@ -1131,7 +1168,9 @@ class Parser:
elif text.startswith('[') and text.endswith(']'):
num_or_name = text[1:-1].strip()
word_type = float_type = False
if num_or_name.endswith(".word"):
if num_or_name.endswith(".byte"):
num_or_name = num_or_name[:-5]
elif num_or_name.endswith(".word"):
word_type = True
num_or_name = num_or_name[:-5]
elif num_or_name.endswith(".float"):
@ -1150,12 +1189,12 @@ class Parser:
raise TypeError("integer required")
elif isinstance(sym, VariableDef):
if sym.type == DataType.BYTE and (word_type or float_type):
raise self.PError("byte value required")
raise self.PError("invalid type modifier, byte expected")
elif sym.type == DataType.WORD and float_type:
raise self.PError("word value required")
raise self.PError("invalid type modifier, word expected")
return ParseResult.MemMappedValue(sym.address, sym.type, sym.length, sym.name)
else:
raise self.PError("invalid symbol type used as lvalue of assignment (3)")
raise self.PError("invalid symbol type used as lvalue of assignment")
else:
addr = parse_expr_as_int(num_or_name, self.cur_block.symbols, self.ppsymbols, self.sourceref)
if word_type:

View File

@ -95,7 +95,7 @@ class SymbolDefinition:
def __init__(self, blockname: str, name: str, sourceref: SourceRef, allocate: bool) -> None:
self.blockname = blockname
self.name = name
self.sourceref = sourceref
self.sourceref = sourceref.copy()
self.allocate = allocate # set to false if the variable is memory mapped (or a constant) instead of allocated
global _identifier_seq_nr
self.seq_nr = _identifier_seq_nr
@ -314,6 +314,8 @@ class SymbolTable:
if identifier:
if isinstance(identifier, SymbolDefinition):
raise SymbolError("identifier was already defined at " + str(identifier.sourceref))
elif isinstance(identifier, SymbolTable):
raise SymbolError("identifier already defined as block at " + str(identifier.owning_block.sourceref))
raise SymbolError("identifier already defined as " + str(type(identifier)))
if name in MATH_SYMBOLS:
print("warning: {}: identifier shadows a name from the math module".format(sourceref))
@ -402,7 +404,11 @@ class SymbolTable:
def merge_roots(self, other_root: 'SymbolTable') -> None:
for name, thing in other_root.symbols.items():
if isinstance(thing, SymbolTable):
self.define_scope(thing, thing.owning_block.sourceref)
try:
self.define_scope(thing, thing.owning_block.sourceref)
except SymbolError as x:
raise SymbolError("problematic symbol '{:s}' from {}; {:s}"
.format(thing.name, thing.owning_block.sourceref, str(x))) from None
def print_table(self, summary_only: bool=False) -> None:
if summary_only:

View File

@ -158,9 +158,7 @@ The syntax "[address]" means: the contents of the memory at address.
By default, if not otherwise known, a single byte is assumed. You can add the ".byte" or ".word" or ".float" suffix
to make it clear what data type the address points to.
Everything after a semicolon ';' is a comment and is ignored.
# @todo Everything after a double semicolon ';;' is a comment and is ignored, but is copied into the resulting assembly source code.
Everything after a semicolon ';' is a comment and is ignored, however the comment is copied into the resulting assembly source code.
FLOW CONTROL

View File

@ -17,10 +17,9 @@
bar
go [AX] ; @todo check indrection jmp (AX)
go [var1] ; @todo check indirection jmp (var1)
go [#mem1] ; @todo check indirection jmp ($cff0)
; go mem1 ; @todo support this, should jmp $cff0
go [AX]
go [var1]
go [#mem1]
go [$c2]
go [$c2dd]
go $c000
@ -36,7 +35,6 @@ bar
fcall [XY]
fcall [var1]
fcall [#mem1]
;fcall mem1 ; @todo support this, should jsr $cff0
fcall [$c2]
fcall [$c2dd]
fcall $c000
@ -52,13 +50,19 @@ bar
call [AX]
call [var1]
call [#mem1]
;call mem1 ; @todo support this, should jsr $cff0
call [$c2]
call [$c2dd]
call $c000
call $c2
asm {
nop
nop
nop
nop
}
call constw
call sub1
call main.start

View File

@ -1,4 +1,4 @@
; var defintions and immediate primitive data type tests
; var definitions and immediate primitive data type tests
output raw
clobberzp
@ -145,11 +145,17 @@ start
A = 255
A = X
A = [$c020]
A = [membyte2]
;A = [membyte2.byte] ; @todo ok
;A = [membyte2.word] ; @todo type error
;A = [membyte2.float] ; @todo type error
; A = #expr_byte1b ; @todo cannot assign address to byte
A = [#membyte2]
A = membyte2
A = [membyte2] ; @todo error, invalid rvalue, use membyte without indirect?
A = [membyte2.byte] ; @todo error, "
A = expr_byte1b ; @todo ok
;A = #expr_byte1b ; @todo cannot assign address to byte, correct error
;A = cbyte3 ; @todo fix assignment to lda #cybte3
;A = [cbyte3] ; @todo error invalid rvalue
A = initbytea0
A = [initbytea0] ; @todo error, invalid rvalue, use initbytea0 without indirect?
XY = 0
XY = '@'
@ -166,6 +172,14 @@ start
AX = ""
AX = XY
AX = Y
;XY = [membyte2] ; @todo ok pad
;XY = [membyte2.byte] ; @todo ok pad
;XY = membyte2 ; @todo ok pad
;XY = #membyte2 ; @todo ok
;XY = [memword1] ; @todo ok
;XY = [memword1.byte] ; @todo ok pad
;XY = [memword1.word] ; @todo ok
[$c000] = 255
[$c000] = '@'
@ -179,16 +193,11 @@ start
[$c000.float] = 65535
[$c000.float] = 456.66
[$c000.float] = 1.70141183e+38
;[$c000.byte] = AX ; @todo out of range
[$c000.word] = AX
[$c001] = [$c002]
;[$c001.word] = [$c002] ;@todo okay (pad)
;[$c001.word] = [$c002.byte] ;@todo okay (pad)
[$c001.word] = [$c002.word]
;[$c001.word] = [$c002.float] ;@todo parse error
;[$c001.float] = [$c002.byte] ;@todo ok
;[$c001.float] = [$c002.word] ;@todo support this
;[$c001.float] = [$c002.float] ;@todo support this
[$c111.word] = [$c222]
[$c112.word] = [$c223.byte]
[$c222.word] = [$c333.word]
SC = 0
@ -220,6 +229,10 @@ start
membyte1 = 22
memword1 = 2233
memfloat = 3.4567
;[membyte1] = 33 ; @todo error, invalid lvalue, use without []
[memword1] = 4444
;[memword1] = [AX] ; @todo error, only address allowed in []
[memfloat] = 5.5566
membyte1 = A
memword1 = A
@ -239,4 +252,3 @@ start
[$c100.word] = "text-immediate" ; reuse
[$c200.word] = "" ; reuse
}

View File

@ -1,5 +1,8 @@
; source IL file
; these are comments
; line 2 comment
; line 3 comment
output prg,sys ; create a c-64 program with basic SYS call to launch it
;clobberzp restore ; clobber over the zp memory normally used by basic/kernel rom, frees up more zp
@ -8,6 +11,8 @@ output prg,sys ; create a c-64 program with basic SYS call to launch it
~main $0a00
{
; this is the main block with the start routine.
memory screen = $d021
memory border = $d020
memory cursor = 646
@ -126,6 +131,8 @@ somelabel1
return
}
; comments before block 4
~ block4 {
A=1
A=2
@ -158,3 +165,7 @@ subroutine
return ,,$99
}
; comment at end
; another one