From 67a71071421c727fb2d5b3600bb83e463d966dea Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Fri, 9 Aug 2019 18:28:32 -0400 Subject: [PATCH] v2 --- asm.py | 181 ++++++++++++++++++++++++++++++++++++++++ string_compiler.py | 204 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 385 insertions(+) create mode 100644 asm.py create mode 100644 string_compiler.py diff --git a/asm.py b/asm.py new file mode 100644 index 0000000..5854103 --- /dev/null +++ b/asm.py @@ -0,0 +1,181 @@ +from textwrap import indent, dedent + +class Block(object): + def __init__(self): + self.size = 0 + self.bne = None + self.bne_long = False + self.labels = [] + self.instr = [] + self.rts = False + + def empty(self): + return self.size == 0 and self.bne == None + +class Assembler(object): + def __init__(self, name): + self.name = name + self.blocks = [] + self.b = None + self.label = 0 + self.new_block() + + + def reserve_label(self): + self.label = self.label + 1 + return "_%d" % (self.label) + + def emit_label(self, l): + if not self.b.empty(): + self.new_block() + self.b.labels.append(l) + + def rts(self): + if not self.b.empty(): + self.new_block() + self.emit("rts",1) + self.b.rts = True + self.new_block() + + def new_block(self): + self.b = Block() + self.blocks.append(self.b) + + def bne(self, l): + self.b.bne = l + self.new_block() + + def emit(self, op, size): + self.b.size = self.b.size + size + self.b.instr.append("\t" + op) + + def merge_rts(self): + blocks = [] + prev = None + for b in self.blocks: + if b.rts and prev and prev.rts: + prev.labels.extend(b.labels) + continue + blocks.append(b) + prev = b + + self.blocks = blocks + + def merge_labels(self): + + map = {} + + for b in self.blocks: + ll = b.labels + if len(ll)>0: + first = ll[0] + for l in ll: map[l] = first + + for b in self.blocks: + if b.bne: + b.bne = map[b.bne] + if len(b.labels)>1: + b.labels = b.labels[0:1] + + def reify_branches(self): + # in practice all branches are forward + # could be clever and try to find a backwards rts branch island + pc = 0 + map = {} + + for b in self.blocks: + for l in b.labels: + map[l] = pc + + pc = pc + b.size + if b.bne: pc = pc + 2 # optimist + + delta = True + while delta: + pc = 0 + delta = False + + for b in self.blocks: + + pc = pc + b.size + l = b.bne + if not l: continue + + if b.bne_long: + pc = pc + 5 + continue + + target = map[l] + diff = target-(pc+1) + pc = pc + 2 + + if diff < -128 or diff > 127: + delta = True + b.bne_long = True + + for x in map: + if map[x] >= pc: + map[x] = map[x] + 3 + + + for b in self.blocks: + + l = b.bne + if not l: continue + if b.bne_long: + b.instr.append("\tbeq *+5") + b.instr.append("\tbrl " + l) + b.size = b.size + 5 + else: + b.instr.append("\tbne " + l) + b.size = b.size + 2 + + + def finish(self,io): + self.b = None + self.merge_rts() + self.merge_labels() + self.reify_branches() + + self.header(io) + for b in self.blocks: + for l in b.labels: io.write(l + "\tanop\n") + for i in b.instr: io.write(i + "\n") + self.footer(io) + + self.blocks = [] + self.new_block() + + def header(self, io): + io.write("\t case on\n"); + io.write(self.name + "\tSTART\n\n") + io.write("cp\tequ 5\n") + + txt = """ + phb + tsc + tcd + phd + pei cp+1 + plb + plb + jsr _action + rep #$20 + lda 1 + sta 5 + lda 3 + sta 7 + pld + pla + pla + txa + plb + rtl + """ + io.write(indent(dedent(txt),"\t")) + io.write("\n_action\tanop\n") + io.write("\tldx #0\n\n") + + + def footer(self, io): + io.write("\tEND\n") diff --git a/string_compiler.py b/string_compiler.py new file mode 100644 index 0000000..701c64b --- /dev/null +++ b/string_compiler.py @@ -0,0 +1,204 @@ +import getopt +import sys +import re + +from asm import Assembler + +flag_ci = False + + +def printf(fmt, *args): print(fmt % args) + +def c_encode(c): + if c in '\\\'': return '\\'+c + return c + +def str_encode(s): + return "".join(reversed([c_encode(x) for x in s])) + +def str_xx(s): + return "".join(reversed(["%02x" % (ord(x)) for x in s])) + +def generate_c(d, level, preserve): + + indent = " " * level + + double = [x for x in d.keys() if len(x) == 2] + single = [x for x in d.keys() if len(x) == 1] + + count = len(d) + if "" in d: count = count - 1 + if count>0: + # if preserve: printf("%s unsigned c;", indent) + if double: printf("%s c = *(unsigned *)(cp+%d);", indent, level*2) + else: printf("%s c = *(unsigned char *)(cp+%d);", indent, level*2) + + if flag_ci: + if double: printf("%s c |= 0x2020;", indent) + else: printf("%s c |= 0x20;", indent) + + + for k in double: + dd = d[k] + printf("%s if (c=='%s'){", indent, str_encode(k)) + generate_c(dd, level+1, count>1) + printf("%s }", indent) + + if single: printf("%s c &= 0xff;", indent) + for k in single: + dd = d[k] + printf("%s if (c=='%s'){", indent, str_encode(k)) + generate_c(dd, level+1, count>1) + printf("%s }", indent) + + + + rv = 0 + if "" in d: rv = d[""] + printf("%s return %d", indent, rv) + + +def generate_asm(asm, d, level): + global flag_ci + + double = [x for x in d.keys() if len(x) == 2] + single = [x for x in d.keys() if len(x) == 1] + short_m = single and not double + + count = len(d) + if "" in d: count = count - 1 + + if count>0: + if short_m: + asm.emit("longa off", 0) + asm.emit("sep #$20", 2) + if level>0: + asm.emit("ldy #{}".format(level * 2), 3) + asm.emit("lda (cp),y", 2) + else: asm.emit("lda (cp)", 2) + + if flag_ci: + if short_m: asm.emit("ora #$20", 2) + else: asm.emit("ora #$2020", 3) + + for k in double: + dd = d[k] + l = asm.reserve_label() + asm.emit("cmp #${}\t; {}".format(str_xx(k), k), 3) + asm.bne(l) + generate_asm(asm, dd, level+1) + asm.emit_label(l) + + if single and double: + asm.emit("longa off", 0) + asm.emit("sep #$20", 2) + short_m = True + + for k in single: + dd = d[k] + l = asm.reserve_label() + asm.emit("cmp #${}\t; {}".format(str_xx(k), k), 2) + asm.bne(l) + generate_asm(asm, dd, level+1) + asm.emit_label(l) + + if short_m: + asm.emit("longa on", 0) + if "" in d: + asm.emit("ldx #{}".format(d[""]), 3) + asm.rts() + + + +def process(data, name): + tree = {} + for k in data.keys(): + + chunks = [k[i*2:i*2+2] for i in range(0,len(k)+1>>1)] + + current = tree + for x in chunks: + if x in current: + current = current[x] + continue + tmp = {} + current[x] = tmp + current = tmp + + current[""] = data[k] + + # print(tree); + asm = Assembler(name) + generate_asm(asm, tree, 0) + asm.finish(sys.stdout) + + +def usage(ex=1): + print("Usage: string_compiler [-i] name [file]") + sys.exit(ex) + + +def read_data(f, name): + global flag_ci + + data = {} + ln = 0 + for line in f: + ln = ln + 1 + line = line.strip() + if line == "" : continue + if line[0] == "#" : continue + + m = re.match(r'^"([^"]*)"\s+(\d+)$', line) + if not m: + err = "{}:{}: Bad data: {}".format(name,ln,line) + raise Exception(err) + k = m[1] + # if flag_ci: k = k.lower() + if flag_ci: + k = "".join([chr(ord(x)|0x20) for x in k]) + + v = int(m[2]) + + if k in data: + err = "{}:{}: Duplicate string: {}".format(name,ln,k) + raise Exception(err) + + data[k] = v + + return data + +def read_stdin(): + return read_data(sys.stdin, "") + +def read_file(path): + with open(path) as f: + return read_data(f, path) + + + +def main(): + global flag_ci + + argv = sys.argv[1:] + opts, args = getopt.getopt(argv, "i") + for k, v in opts: + if k == "-i": flag_ci = True + else: + usage() + + if len(args) < 1 or len(args) > 2: + usage() + + name = args[0] + data = {} + + if len(args) == 1 or args[1] == "-": + data = read_stdin() + else: + data = read_file(args[1]) + + process(data, name) + + sys.exit(0) +main() \ No newline at end of file