From 76101d7f8dd254b944e5ac5fdfed9385e971da9e Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Tue, 5 Jan 2021 22:56:52 +0100 Subject: [PATCH] assem --- docs/source/todo.rst | 2 + examples/cx16/assembler/Makefile | 8 + examples/cx16/assembler/assem.p8 | 102 ++++--- examples/cx16/assembler/gen_opcodes.py | 358 +++++++++++++------------ examples/cx16/assembler/hashes.py | 17 ++ examples/cx16/assembler/perfecthash.py | 180 +++++++++++++ 6 files changed, 450 insertions(+), 217 deletions(-) create mode 100644 examples/cx16/assembler/Makefile create mode 100644 examples/cx16/assembler/hashes.py create mode 100644 examples/cx16/assembler/perfecthash.py diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 693c3982e..2e829a984 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -3,6 +3,8 @@ TODO ==== - move all str* builtin functions to a strings library module, mem* to the sys module. update docs. +- use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y +- optimize pointer access code @(pointer)? use a subroutine? macro? 65c02 vs 6502? - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50 - detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine) diff --git a/examples/cx16/assembler/Makefile b/examples/cx16/assembler/Makefile new file mode 100644 index 000000000..1c3a152cf --- /dev/null +++ b/examples/cx16/assembler/Makefile @@ -0,0 +1,8 @@ +all: perfecthash.c opcodes.asm + +perfecthash.c: gen_opcodes.py + python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c + +opcodes.asm: gen_opcodes.py + python gen_opcodes.py --parser > opcodes.asm + diff --git a/examples/cx16/assembler/assem.p8 b/examples/cx16/assembler/assem.p8 index b3700d898..22c06a628 100644 --- a/examples/cx16/assembler/assem.p8 +++ b/examples/cx16/assembler/assem.p8 @@ -61,12 +61,8 @@ textparse { return } - uword value = parse_number(word_addrs[2]) + uword value = conv.any2uword(word_addrs[2]) if strcmp("*", word_addrs[0])==0 { - if value == $ffff { - txt.print("?invalid address\n") - return - } program_counter = value } else { set_symbol(word_addrs[0], value) @@ -151,19 +147,6 @@ textparse { emit(lsb(cx16.r0)) emit(msb(cx16.r0)) } - repeat 2-num_operand_bytes { - txt.print(" ") - } - txt.chrout(' ') - txt.print(word_addrs[0]) - if word_addrs[1] { - txt.chrout(' ') - txt.print(word_addrs[1]) - } - if word_addrs[2] { - txt.chrout(' ') - txt.print(word_addrs[2]) - } txt.chrout('\n') } } else { @@ -207,7 +190,6 @@ textparse { ; -- returns true/false success status, the value is in cx16.r0 if succesful ; TODO number parsing error detection ; TODO optimize this (coalesce various parsing options) - ; TODO fix number parsing by ending the number with \0 after the last digit when addr_mode { instructions.am_Imp, instructions.am_Acc -> { @@ -216,49 +198,76 @@ textparse { } instructions.am_Imm -> { ; lda #$12 - cx16.r0 = parse_number(operand_ptr+1) + terminate_number(operand_ptr+1) + cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true } - instructions.am_Zp, instructions.am_Zpr -> { - ; lda $02 / brr0 $12,label - cx16.r0 = parse_number(operand_ptr) + instructions.am_Zp -> { + ; lda $02 + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) + debug_print_value(operand_ptr) + return true + } + instructions.am_Zpr -> { + ; brr0 $12,label + ; TODO parse the label, relative offset + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_ZpX, instructions.am_ZpY -> { ; lda $02,x / lda $02,y - cx16.r0 = parse_number(operand_ptr) + ; TODO parse the ,x/y + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_Rel -> { - cx16.r0 = parse_number(operand_ptr) + ; bcc $c000 + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) ; TODO calcualate relative offset to current programcounter debug_print_value(operand_ptr) return true } instructions.am_Abs -> { ; jmp $1234 - cx16.r0 = parse_number(operand_ptr) + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_AbsX, instructions.am_AbsY -> { ; sta $3000,x / sta $3000,y - cx16.r0 = parse_number(operand_ptr) + ; TODO parse the ,x/,y + terminate_number(operand_ptr) + cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_Ind -> { ; jmp ($fffc) - cx16.r0 = parse_number(operand_ptr+1) + terminate_number(operand_ptr+1) + cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true } - instructions.am_IzX, instructions.am_IzY, instructions.am_Izp, instructions.am_IaX -> { - ; lda ($02,x) / lda ($02),y / lda ($02) / jmp ($a000,x) - cx16.r0 = parse_number(operand_ptr+1) + instructions.am_IzX, instructions.am_IzY, instructions.am_IaX -> { + ; lda ($02,x) / lda ($02),y / jmp ($a000,x) + ; TODO parse the ,x/,y + terminate_number(operand_ptr+1) + cx16.r0 = conv.any2uword(operand_ptr+1) + debug_print_value(operand_ptr+1) + return true + } + instructions.am_Izp -> { + ; lda ($02) + terminate_number(operand_ptr+1) + cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true } @@ -275,14 +284,20 @@ textparse { } } - - sub parse_number(uword strptr) -> uword { - ; TODO move to conv module and optimize - if @(strptr)=='$' - return conv.hex2uword(strptr) - if @(strptr)=='%' - return conv.bin2uword(strptr) - return conv.str2uword(strptr) + sub terminate_number(uword strptr) { + ; replace the first terminating character after a number (such as a , or close parens) + ; with a 0 to terminate the number and make the parse routine happy. + ; TODO remove this once the various conv routines are more robust and stop at a non-digit + repeat { + when @(strptr) { + 0 -> return + ',', ')', ' ', 9, '\n' -> { + @(strptr) = 0 + return + } + } + strptr++ + } } sub split_input() { @@ -442,8 +457,9 @@ instructions { 'a' -> { if @(operand_ptr+1) == 0 return am_Acc - ; some expression TODO - return am_Invalid + ; some expression + ; zp or absolute depends on the value of the symbol referenced + return am_Invalid ; TODO } '#' -> { if @(operand_ptr+1) @@ -452,24 +468,28 @@ instructions { } '(' -> { ; some indirect TODO + ; can be (zp), (zp,x), (zp),y, (abs), (abs,x) if @(operand_ptr+1) return am_Ind return am_Invalid } '$' -> { ; hex address TODO + ; can be followed by ,x or ,y if @(operand_ptr+1) return am_Abs return am_Invalid } '%' -> { ; bin address TODO + ; can be followed by ,x or ,y if @(operand_ptr+1) return am_Abs return am_Invalid } '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { ; absolute or indexed address TODO + ; can be followed by ,x or ,y return am_Abs } } diff --git a/examples/cx16/assembler/gen_opcodes.py b/examples/cx16/assembler/gen_opcodes.py index c91705bab..eae72d2e3 100644 --- a/examples/cx16/assembler/gen_opcodes.py +++ b/examples/cx16/assembler/gen_opcodes.py @@ -1,3 +1,4 @@ +import sys from collections import Counter from enum import IntEnum @@ -299,198 +300,203 @@ for ins in Instructions: else: InstructionsByMode[ins[2]].append((ins[1], ins[0])) -# build the name->modes table -print("; generated by opcodes.py") -print("; addressing modes:") -for mode in AddrMode: - print(";", mode.value, "=", mode.name) -print() +def generate_mnemonics_parser(): + print("; generated by opcodes.py") + print("; addressing modes:") + for mode in AddrMode: + print(";", mode.value, "=", mode.name) + print() -print(""" - .enc "petscii" ;define an ascii to petscii encoding - .cdef " @", 32 ;characters - .cdef "AZ", $c1 - .cdef "az", $41 - .cdef "[[", $5b - .cdef "]]", $5d - .edef "", [];replace with no bytes -""") + print(""" + .enc "petscii" ;define an ascii to petscii encoding + .cdef " @", 32 ;characters + .cdef "AZ", $c1 + .cdef "az", $41 + .cdef "[[", $5b + .cdef "]]", $5d + .edef "", [];replace with no bytes + """) -for instr in sorted(InstructionsByName.items()): - print("i_" + instr[0] + ":\n\t.byte ", end="") - if len(instr[1]) == 1: - # many instructions have just 1 addressing mode, save space for those - info = instr[1].popitem() - print("1,", info[0].value,",", info[1]) - else: - print("0, ", end='') - mode_opcodes = [] - for mode in AddrMode: - if mode in instr[1]: - mode_opcodes.append(instr[1][mode]) - else: - mode_opcodes.append(0) - print(",".join(str(o) for o in mode_opcodes), end="") - print() + for instr in sorted(InstructionsByName.items()): + print("i_" + instr[0] + ":\n\t.byte ", end="") + if len(instr[1]) == 1: + # many instructions have just 1 addressing mode, save space for those + info = instr[1].popitem() + print("1,", info[0].value,",", info[1]) + else: + print("0, ", end='') + mode_opcodes = [] + for mode in AddrMode: + if mode in instr[1]: + mode_opcodes.append(instr[1][mode]) + else: + mode_opcodes.append(0) + print(",".join(str(o) for o in mode_opcodes), end="") + print() + def determine_mnemonics(): + mnemonics = list(sorted(set(ins[1] for ins in Instructions))) -def determine_mnemonics(): - mnemonics = list(sorted(set(ins[1] for ins in Instructions))) + # opcodes histogram (ordered by occurrence) (in kernal + basic roms of the c64): + opcode_occurrences = [ + (32, 839), (133, 502), (165, 488), (0, 429), (208, 426), (169, 390), (76, 324), (240, 322), (2, 314), (160, 245), + (96, 228), (3, 201), (1, 191), (255, 186), (144, 182), (170, 175), (162, 169), (177, 165), (104, 159), (164, 158), + (132, 157), (201, 156), (72, 151), (141, 150), (200, 146), (173, 144), (166, 139), (176, 139), (16, 138), + (134, 138), (73, 127), (24, 119), (101, 113), (69, 109), (13, 107), (34, 104), (145, 103), (4, 102), (168, 101), + (221, 98), (230, 93), (48, 91), (189, 87), (41, 86), (6, 86), (9, 86), (8, 85), (79, 85), (138, 80), (10, 80), + (7, 79), (185, 77), (56, 75), (44, 75), (78, 74), (105, 73), (5, 73), (174, 73), (220, 71), (198, 69), (232, 69), + (36, 69), (202, 67), (152, 67), (95, 67), (100, 65), (102, 65), (247, 65), (188, 64), (136, 64), (84, 64), + (122, 62), (128, 61), (80, 61), (186, 60), (82, 59), (97, 58), (15, 57), (70, 57), (229, 56), (19, 55), (40, 54), + (183, 54), (65, 54), (233, 53), (180, 53), (12, 53), (171, 53), (197, 53), (83, 52), (248, 52), (112, 51), + (237, 51), (89, 50), (11, 50), (158, 50), (74, 49), (224, 48), (20, 47), (238, 47), (108, 46), (234, 46), + (251, 46), (254, 46), (184, 45), (14, 44), (163, 44), (226, 43), (211, 43), (88, 43), (98, 42), (17, 42), + (153, 42), (243, 41), (228, 41), (99, 41), (253, 41), (209, 41), (187, 39), (123, 39), (67, 39), (196, 38), + (68, 38), (35, 38), (172, 38), (175, 38), (161, 38), (85, 38), (191, 37), (113, 37), (182, 37), (151, 37), + (71, 36), (181, 35), (214, 35), (121, 35), (157, 35), (178, 35), (77, 35), (42, 34), (212, 33), (18, 33), + (127, 33), (241, 33), (21, 33), (249, 32), (23, 31), (245, 30), (142, 30), (55, 29), (140, 29), (46, 29), + (192, 29), (179, 29), (252, 29), (115, 29), (22, 29), (43, 28), (215, 28), (45, 28), (246, 28), (38, 28), + (86, 27), (225, 27), (25, 26), (239, 26), (58, 26), (167, 26), (147, 26), (217, 26), (149, 25), (30, 25), + (206, 25), (28, 24), (47, 24), (37, 24), (155, 24), (129, 23), (148, 23), (111, 23), (29, 23), (39, 23), + (51, 22), (193, 22), (236, 22), (120, 22), (64, 22), (204, 21), (210, 21), (244, 21), (52, 21), (66, 21), + (114, 20), (250, 20), (106, 20), (93, 19), (199, 19), (218, 19), (154, 19), (205, 19), (50, 19), (159, 19), + (194, 19), (49, 19), (190, 19), (103, 18), (216, 18), (213, 18), (107, 18), (131, 18), (63, 18), (94, 18), + (91, 17), (242, 17), (109, 17), (53, 16), (227, 16), (139, 16), (31, 16), (75, 16), (60, 16), (195, 15), + (231, 15), (62, 15), (59, 15), (87, 14), (207, 14), (27, 14), (90, 14), (110, 13), (223, 13), (57, 13), + (118, 12), (26, 12), (203, 12), (81, 12), (156, 12), (54, 12), (235, 12), (146, 11), (135, 11), (126, 11), + (150, 11), (130, 11), (143, 10), (61, 10), (219, 10), (124, 9), (222, 9), (125, 9), (119, 7), (137, 7), + (33, 7), (117, 5), (92, 4), (116, 3) + ] - # opcodes histogram (ordered by occurrence) (in kernal + basic roms of the c64): - opcode_occurrences = [ - (32, 839), (133, 502), (165, 488), (0, 429), (208, 426), (169, 390), (76, 324), (240, 322), (2, 314), (160, 245), - (96, 228), (3, 201), (1, 191), (255, 186), (144, 182), (170, 175), (162, 169), (177, 165), (104, 159), (164, 158), - (132, 157), (201, 156), (72, 151), (141, 150), (200, 146), (173, 144), (166, 139), (176, 139), (16, 138), - (134, 138), (73, 127), (24, 119), (101, 113), (69, 109), (13, 107), (34, 104), (145, 103), (4, 102), (168, 101), - (221, 98), (230, 93), (48, 91), (189, 87), (41, 86), (6, 86), (9, 86), (8, 85), (79, 85), (138, 80), (10, 80), - (7, 79), (185, 77), (56, 75), (44, 75), (78, 74), (105, 73), (5, 73), (174, 73), (220, 71), (198, 69), (232, 69), - (36, 69), (202, 67), (152, 67), (95, 67), (100, 65), (102, 65), (247, 65), (188, 64), (136, 64), (84, 64), - (122, 62), (128, 61), (80, 61), (186, 60), (82, 59), (97, 58), (15, 57), (70, 57), (229, 56), (19, 55), (40, 54), - (183, 54), (65, 54), (233, 53), (180, 53), (12, 53), (171, 53), (197, 53), (83, 52), (248, 52), (112, 51), - (237, 51), (89, 50), (11, 50), (158, 50), (74, 49), (224, 48), (20, 47), (238, 47), (108, 46), (234, 46), - (251, 46), (254, 46), (184, 45), (14, 44), (163, 44), (226, 43), (211, 43), (88, 43), (98, 42), (17, 42), - (153, 42), (243, 41), (228, 41), (99, 41), (253, 41), (209, 41), (187, 39), (123, 39), (67, 39), (196, 38), - (68, 38), (35, 38), (172, 38), (175, 38), (161, 38), (85, 38), (191, 37), (113, 37), (182, 37), (151, 37), - (71, 36), (181, 35), (214, 35), (121, 35), (157, 35), (178, 35), (77, 35), (42, 34), (212, 33), (18, 33), - (127, 33), (241, 33), (21, 33), (249, 32), (23, 31), (245, 30), (142, 30), (55, 29), (140, 29), (46, 29), - (192, 29), (179, 29), (252, 29), (115, 29), (22, 29), (43, 28), (215, 28), (45, 28), (246, 28), (38, 28), - (86, 27), (225, 27), (25, 26), (239, 26), (58, 26), (167, 26), (147, 26), (217, 26), (149, 25), (30, 25), - (206, 25), (28, 24), (47, 24), (37, 24), (155, 24), (129, 23), (148, 23), (111, 23), (29, 23), (39, 23), - (51, 22), (193, 22), (236, 22), (120, 22), (64, 22), (204, 21), (210, 21), (244, 21), (52, 21), (66, 21), - (114, 20), (250, 20), (106, 20), (93, 19), (199, 19), (218, 19), (154, 19), (205, 19), (50, 19), (159, 19), - (194, 19), (49, 19), (190, 19), (103, 18), (216, 18), (213, 18), (107, 18), (131, 18), (63, 18), (94, 18), - (91, 17), (242, 17), (109, 17), (53, 16), (227, 16), (139, 16), (31, 16), (75, 16), (60, 16), (195, 15), - (231, 15), (62, 15), (59, 15), (87, 14), (207, 14), (27, 14), (90, 14), (110, 13), (223, 13), (57, 13), - (118, 12), (26, 12), (203, 12), (81, 12), (156, 12), (54, 12), (235, 12), (146, 11), (135, 11), (126, 11), - (150, 11), (130, 11), (143, 10), (61, 10), (219, 10), (124, 9), (222, 9), (125, 9), (119, 7), (137, 7), - (33, 7), (117, 5), (92, 4), (116, 3) - ] + cnt = Counter() + for opcode, amount in opcode_occurrences: + cnt[AllInstructions[opcode][1]] += amount + cnt["nop"] = 13 + cnt["tsb"] = 13 - cnt = Counter() - for opcode, amount in opcode_occurrences: - cnt[AllInstructions[opcode][1]] += amount - cnt["nop"] = 13 - cnt["tsb"] = 13 + four_letter_mnemonics = list(sorted([ins[1] for ins in AllInstructions if len(ins[1])>3])) + for ins4 in four_letter_mnemonics: + del cnt[ins4] + cnt[ins4] = 1 + mnem2 = [c[0] for c in cnt.most_common()] + if len(mnem2)!=len(mnemonics): + raise ValueError("mnem count mismatch") + return mnem2 - four_letter_mnemonics = list(sorted([ins[1] for ins in AllInstructions if len(ins[1])>3])) - for ins4 in four_letter_mnemonics: - del cnt[ins4] - cnt[ins4] = 1 - mnem2 = [c[0] for c in cnt.most_common()] - if len(mnem2)!=len(mnemonics): - raise ValueError("mnem count mismatch") - return mnem2 + mnemonics = determine_mnemonics() + def first_letters(): + firstletters = {m[0]: 0 for m in mnemonics} + return firstletters.keys() -mnemonics = determine_mnemonics() + def second_letters(firstletter): + secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter} + return secondletters.keys() + def third_letters(firstletter, secondletter): + thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter} + return thirdletters.keys() -def first_letters(): - firstletters = {m[0]: 0 for m in mnemonics} - return firstletters.keys() + def fourth_letters(firstletter, secondletter, thirdletter): + longmnem = [m for m in mnemonics if len(m) > 3] + fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter} + return fourthletters.keys() - -def second_letters(firstletter): - secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter} - return secondletters.keys() - - -def third_letters(firstletter, secondletter): - thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter} - return thirdletters.keys() - - -def fourth_letters(firstletter, secondletter, thirdletter): - longmnem = [m for m in mnemonics if len(m) > 3] - fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter} - return fourthletters.keys() - - -def make_tree(): - tree = {} - for first in first_letters(): - tree[first] = { - secondletter: { - thirdletter: { - fourthletter: {} - for fourthletter in fourth_letters(first, secondletter, thirdletter) + def make_tree(): + tree = {} + for first in first_letters(): + tree[first] = { + secondletter: { + thirdletter: { + fourthletter: {} + for fourthletter in fourth_letters(first, secondletter, thirdletter) + } + for thirdletter in third_letters(first, secondletter) } - for thirdletter in third_letters(first, secondletter) + for secondletter in second_letters(first) } - for secondletter in second_letters(first) - } - return tree + return tree + + tree = make_tree() + + print("get_opcode_info .proc") + print("_mnem_fourth_letter = cx16.r4") + print("_mnem_fifth_letter = cx16.r5") + for first in tree: + print(" cmp #'%s'" % first) + print(" bne _not_%s" % first) + for second in tree[first]: + print(" cpx #'%s'" % second) + print(" bne _not_%s%s" % (first,second)) + for third in tree[first][second]: + print(" cpy #'%s'" % third) + print(" bne _not_%s%s%s" % (first, second, third)) + fourth = tree[first][second][third] + if fourth: + if "".join(fourth.keys()) != "01234567": + raise ValueError("fourth", fourth.keys()) + print(" bra _check_%s%s%s" % (first, second, third)) + else: + print(" lda _mnem_fourth_letter") # check that the fourth letter is not present + print(" bne _invalid") + print(" lda #i_%s%s%s" % (first, second, third)) + print(" rts") + print("_not_%s%s%s:" % (first, second, third)) + print("_not_%s%s:" % (first, second)) + print("_not_%s:" % first) + print("_invalid:") + print(" lda #0") + print(" ldy #0") + print(" rts") + + # the 4-letter mnemonics are: + # smb[0-7] + # bbr[0-7] + # rmb[0-7] + # bbs[0-7] + for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: + print("_check_%s" % fourlettermnemonic) + print(" lda #<_tab_%s" % fourlettermnemonic) + print(" ldy #>_tab_%s" % fourlettermnemonic) + print(""" sta P8ZP_SCRATCH_W2 + sty P8ZP_SCRATCH_W2+1 + bra _check4""") + + print("""_check4 + lda _mnem_fourth_letter + cmp #'0' + bcc _invalid + cmp #'8' + bcs _invalid + lda _mnem_fifth_letter ; must have no fifth letter + bne _invalid + tay + lda (P8ZP_SCRATCH_W2),y + pha + iny + lda (P8ZP_SCRATCH_W2),y + tay + pla + rts""") + + for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: + print("_tab_%s" % fourlettermnemonic) + for ii in "01234567": + print(" .word i_%s%s" % (fourlettermnemonic, ii)) + + print(" .pend") -tree = make_tree() +def generate_mnem_list(): + for m in sorted(InstructionsByName): + print(m.upper()) -print("get_opcode_info .proc") -print("_mnem_fourth_letter = cx16.r4") -print("_mnem_fifth_letter = cx16.r5") -for first in tree: - print(" cmp #'%s'" % first) - print(" bne _not_%s" % first) - for second in tree[first]: - print(" cpx #'%s'" % second) - print(" bne _not_%s%s" % (first,second)) - for third in tree[first][second]: - print(" cpy #'%s'" % third) - print(" bne _not_%s%s%s" % (first, second, third)) - fourth = tree[first][second][third] - if fourth: - if "".join(fourth.keys()) != "01234567": - raise ValueError("fourth", fourth.keys()) - print(" bra _check_%s%s%s" % (first, second, third)) - else: - print(" lda _mnem_fourth_letter") # check that the fourth letter is not present - print(" bne _invalid") - print(" lda #i_%s%s%s" % (first, second, third)) - print(" rts") - print("_not_%s%s%s:" % (first, second, third)) - print("_not_%s%s:" % (first, second)) - print("_not_%s:" % first) -print("_invalid:") -print(" lda #0") -print(" ldy #0") -print(" rts") - -# the 4-letter mnemonics are: -# smb[0-7] -# bbr[0-7] -# rmb[0-7] -# bbs[0-7] -for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: - print("_check_%s" % fourlettermnemonic) - print(" lda #<_tab_%s" % fourlettermnemonic) - print(" ldy #>_tab_%s" % fourlettermnemonic) - print(""" sta P8ZP_SCRATCH_W2 - sty P8ZP_SCRATCH_W2+1 - bra _check4""") - -print("""_check4 - lda _mnem_fourth_letter - cmp #'0' - bcc _invalid - cmp #'8' - bcs _invalid - lda _mnem_fifth_letter ; must have no fifth letter - bne _invalid - tay - lda (P8ZP_SCRATCH_W2),y - pha - iny - lda (P8ZP_SCRATCH_W2),y - tay - pla - rts""") - -for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: - print("_tab_%s" % fourlettermnemonic) - for ii in "01234567": - print(" .word i_%s%s" % (fourlettermnemonic, ii)) - -print(" .pend") +if __name__=="__main__": + if sys.argv[1]=="--mnemlist": + generate_mnem_list() + elif sys.argv[1]=="--parser": + generate_mnemonics_parser() + else: + print("invalid arg") diff --git a/examples/cx16/assembler/hashes.py b/examples/cx16/assembler/hashes.py new file mode 100644 index 000000000..84f2b7df3 --- /dev/null +++ b/examples/cx16/assembler/hashes.py @@ -0,0 +1,17 @@ +import re + +hashcode = open("perfecthash.c", "rt").read() + +entries = hashcode.split("wordlist")[1].split("{")[1].split("}")[0].strip().split(",") + +max_hash_value = int(re.search(r"MAX_HASH_VALUE = (\d+)", hashcode).group(1)) + +if len(entries) != max_hash_value+1: + raise ValueError("inconsistent number of entries parsed") + + +entries = [e.strip() for e in entries] +entries = [None if e.endswith('0') else e.strip('"') for e in entries] + +for ix, entry in enumerate(entries): + print(ix, entry or "-") diff --git a/examples/cx16/assembler/perfecthash.py b/examples/cx16/assembler/perfecthash.py new file mode 100644 index 000000000..fcc51aeeb --- /dev/null +++ b/examples/cx16/assembler/perfecthash.py @@ -0,0 +1,180 @@ +TOTAL_KEYWORDS = 98 +MIN_WORD_LENGTH = 3 +MAX_WORD_LENGTH = 4 +MIN_HASH_VALUE = 2 +MAX_HASH_VALUE = 134 + + +def hash(string: str, length: int) -> int: + asso_values = [ + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 65, 62, + 61, 58, 57, 54, 47, 46, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 26, 4, 1, 2, 33, + 2, 135, 135, 15, 69, 4, 30, 10, 52, 17, + 3, 34, 13, 0, 5, 29, 7, 69, 18, 6, + 53, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, + 135, 135, 135, 135, 135, 135, 135, 135, 135 ] + + hval = 0 + if length > 3: + hval += asso_values[ord(string[3])] + if length > 2: + hval += asso_values[ord(string[2])] + if length > 1: + hval += asso_values[ord(string[1])+1] + hval += asso_values[ord(string[0])] + return hval + + +wordlist = [ + None, + None, + "SBC", + "SEC", + "SED", + "DEC", + "BCS", + "BCC", + "BRK", + "TRB", + "DEY", + "TXS", + "CLC", + "CLD", + "TSB", + "TAY", + "PLP", + "SEI", + "CLV", + "PLY", + None, + "PHP", + "DEX", + None, + "PHY", + None, + "CLI", + "TAX", + "TSX", + "ROR", + "BRA", + "PLX", + "STP", + "INC", + None, + "STY", + "PHX", + "TXA", + "INY", + "PLA", + "BEQ", + "CPY", + "RTS", + "ORA", + "PHA", + "AND", + "ROL", + "STX", + "LSR", + "EOR", + "INX", + "BBS7", + "BBS6", + "CPX", + "BNE", + "STA", + "CMP", + "RTI", + "NOP", + "BBS5", + "ADC", + "ASL", + "BBS4", + "BBS3", + "BBR7", + "BBR6", + "BBS2", + "BBS1", + "BPL", + "LDY", + "BBS0", + "BMI", + "BBR5", + "BVS", + "BVC", + "BBR4", + "BBR3", + None, + "BIT", + "BBR2", + "BBR1", + "LDX", + "STZ", + "BBR0", + "TYA", + None, + None, + "JSR", + "WAI", + "LDA", + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + None, + "SMB7", + "SMB6", + None, + None, + None, + None, + None, + None, + "SMB5", + None, + None, + "SMB4", + "SMB3", + "RMB7", + "RMB6", + "SMB2", + "SMB1", + None, + None, + "SMB0", + None, + "RMB5", + "JMP", + None, + "RMB4", + "RMB3", + None, + None, + "RMB2", + "RMB1", + None, + None, + "RMB0" + ] + +def in_word_set(string: str) -> bool: + length = len(string) + if 3 <= length <= 4: + key = hash(string, length) + if key <= MAX_HASH_VALUE: + word = wordlist[key] + return word and word==string + return False