From 07cad750c9a7a089a4299588c30bf4f3f86e7794 Mon Sep 17 00:00:00 2001 From: Kelvin Sherlock Date: Fri, 9 Aug 2019 18:52:20 -0400 Subject: [PATCH] improved lowercase matching algorithm. only ora #$20 IF the target character is lowercase. order the comparisons so accumulator can be successively modified. worst case, 2 loads are needed per word "##" -> load, no ora "A#" -> ora $0020 "#A" -> reload, ora #$2000 "AA" -> ora $#2020 --- string_compiler.py | 53 +++++++++++++++++++++++++++++++++++++++------- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/string_compiler.py b/string_compiler.py index 701c64b..86ab3f6 100644 --- a/string_compiler.py +++ b/string_compiler.py @@ -1,6 +1,7 @@ import getopt import sys import re +from functools import reduce from asm import Assembler @@ -58,6 +59,34 @@ def generate_c(d, level, preserve): printf("%s return %d", indent, rv) +def str_to_int(cc): + fn = lambda x, y: (x << 8) + ord(y) + return reduce(fn, reversed(cc), 0) + +def str_to_print(cc): + return "".join([x if x.isprintable() else "." for x in cc]) + +def or_mask(cc): + fn = lambda x, y: (x << 8) + (0x20 * y.islower()) + return reduce(fn, reversed(cc), 0) + +def load_char(asm, dirty, level, short_m, old, new): + + if old & ~new: dirty = True + if dirty: + if level == 0: + asm.emit("lda (cp)", 2) + else: + asm.emit("lda (cp),y", 2) + old = 0 + + if old == new: return new + + if short_m: asm.emit("ora #${:02x}".format(new), 2) + else: asm.emit("ora #${:04x}".format(new), 3) + return new + + def generate_asm(asm, d, level): global flag_ci @@ -65,6 +94,14 @@ def generate_asm(asm, d, level): single = [x for x in d.keys() if len(x) == 1] short_m = single and not double + mask = 0 + if flag_ci: + single.sort(key = or_mask) + double.sort(key = or_mask) + if len(single): mask = or_mask(single[0]) + if len(double): mask = or_mask(double[0]) + + count = len(d) if "" in d: count = count - 1 @@ -74,17 +111,14 @@ def generate_asm(asm, d, level): asm.emit("sep #$20", 2) if level>0: asm.emit("ldy #{}".format(level * 2), 3) - asm.emit("lda (cp),y", 2) - else: asm.emit("lda (cp)", 2) - if flag_ci: - if short_m: asm.emit("ora #$20", 2) - else: asm.emit("ora #$2020", 3) + mask = load_char(asm, True, level, short_m, 0, mask) for k in double: dd = d[k] l = asm.reserve_label() - asm.emit("cmp #${}\t; {}".format(str_xx(k), k), 3) + mask = load_char(asm, False, level, short_m, mask, or_mask(k)) + asm.emit("cmp #${:04x}\t; '{}'".format(str_to_int(k), str_to_print(k)), 3) asm.bne(l) generate_asm(asm, dd, level+1) asm.emit_label(l) @@ -93,11 +127,13 @@ def generate_asm(asm, d, level): asm.emit("longa off", 0) asm.emit("sep #$20", 2) short_m = True + mask = mask & 0xff for k in single: dd = d[k] l = asm.reserve_label() - asm.emit("cmp #${}\t; {}".format(str_xx(k), k), 2) + mask = load_char(asm, False, level, short_m, mask, or_mask(k)) + asm.emit("cmp #${:02x}\t; '{}'".format(str_to_int(k), str_to_print(k)), 2) asm.bne(l) generate_asm(asm, dd, level+1) asm.emit_label(l) @@ -156,7 +192,8 @@ def read_data(f, name): k = m[1] # if flag_ci: k = k.lower() if flag_ci: - k = "".join([chr(ord(x)|0x20) for x in k]) + k = k.lower() + # k = "".join([chr(ord(x)|0x20) for x in k]) v = int(m[2])