assem

2025-02-18 20:30:43 +00:00 · 2021-01-05 22:56:52 +01:00 · 2021-01-05 22:56:52 +01:00 · 76101d7f8d
commit 76101d7f8d
parent 7d6a0ab256
6 changed files with 450 additions and 217 deletions
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,6 +3,8 @@ TODO
 ====

 - move all str* builtin functions to a strings library module, mem* to the sys module. update docs.
+- use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
+- optimize pointer access code @(pointer)? use a subroutine? macro?  65c02 vs 6502?
 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
 - detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
--- a/examples/cx16/assembler/Makefile
+++ b/examples/cx16/assembler/Makefile
@ -0,0 +1,8 @@
+all: perfecthash.c opcodes.asm
+
+perfecthash.c:  gen_opcodes.py
+	python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c
+
+opcodes.asm:  gen_opcodes.py
+	python gen_opcodes.py --parser > opcodes.asm
+
--- a/examples/cx16/assembler/assem.p8
+++ b/examples/cx16/assembler/assem.p8
@ -61,12 +61,8 @@ textparse {
            return
        }

-        uword value = parse_number(word_addrs[2])
+        uword value = conv.any2uword(word_addrs[2])
        if strcmp("*", word_addrs[0])==0 {
-            if value == $ffff {
-                txt.print("?invalid address\n")
-                return
-            }
            program_counter = value
        } else {
            set_symbol(word_addrs[0], value)
@ -151,19 +147,6 @@ textparse {
                    emit(lsb(cx16.r0))
                    emit(msb(cx16.r0))
                }
-                repeat 2-num_operand_bytes {
-                    txt.print("   ")
-                }
-                txt.chrout(' ')
-                txt.print(word_addrs[0])
-                if word_addrs[1] {
-                    txt.chrout(' ')
-                    txt.print(word_addrs[1])
-                }
-                if word_addrs[2] {
-                    txt.chrout(' ')
-                    txt.print(word_addrs[2])
-                }
                txt.chrout('\n')
            }
        } else {
@ -207,7 +190,6 @@ textparse {
        ; -- returns true/false success status,  the value is in cx16.r0 if succesful
        ; TODO number parsing error detection
        ; TODO optimize this (coalesce various parsing options)
-        ; TODO fix number parsing by ending the number with \0 after the last digit

        when addr_mode {
            instructions.am_Imp, instructions.am_Acc -> {
@ -216,49 +198,76 @@ textparse {
            }
            instructions.am_Imm -> {
                ; lda #$12
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_Zp, instructions.am_Zpr -> {
-                ; lda  $02 / brr0 $12,label
-                cx16.r0 = parse_number(operand_ptr)
+            instructions.am_Zp -> {
+                ; lda  $02
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
+                debug_print_value(operand_ptr)
+                return true
+            }
+            instructions.am_Zpr -> {
+                ; brr0 $12,label
+                ; TODO parse the label, relative offset
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_ZpX, instructions.am_ZpY -> {
                ; lda $02,x / lda $02,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/y
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Rel -> {
-                cx16.r0 = parse_number(operand_ptr)
+                ; bcc  $c000
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                ; TODO calcualate relative offset to current programcounter
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Abs -> {
                ; jmp $1234
-                cx16.r0 = parse_number(operand_ptr)
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_AbsX, instructions.am_AbsY -> {
                ; sta $3000,x / sta $3000,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/,y
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Ind  -> {
                ; jmp ($fffc)
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_IzX, instructions.am_IzY, instructions.am_Izp, instructions.am_IaX  -> {
-                ; lda ($02,x) / lda ($02),y / lda ($02) / jmp ($a000,x)
-                cx16.r0 = parse_number(operand_ptr+1)
+            instructions.am_IzX, instructions.am_IzY, instructions.am_IaX  -> {
+                ; lda ($02,x) / lda ($02),y / jmp ($a000,x)
+                ; TODO parse the ,x/,y
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
+                debug_print_value(operand_ptr+1)
+                return true
+            }
+            instructions.am_Izp  -> {
+                ; lda ($02)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
@ -275,14 +284,20 @@ textparse {
        }
    }

-
-    sub parse_number(uword strptr) -> uword {
-        ; TODO move to conv module and optimize
-        if @(strptr)=='$'
-            return conv.hex2uword(strptr)
-        if @(strptr)=='%'
-            return conv.bin2uword(strptr)
-        return conv.str2uword(strptr)
+    sub terminate_number(uword strptr) {
+        ; replace the first terminating character after a number (such as a , or close parens)
+        ;  with a 0 to terminate the number and make the parse routine happy.
+        ; TODO remove this once the various conv routines are more robust and stop at a non-digit
+        repeat {
+            when @(strptr) {
+                0 -> return
+                ',', ')', ' ', 9, '\n' -> {
+                    @(strptr) = 0
+                    return
+                }
+            }
+            strptr++
+        }
    }

    sub split_input() {
@ -442,8 +457,9 @@ instructions {
            'a' -> {
                if @(operand_ptr+1) == 0
                    return am_Acc
-                ; some expression TODO
-                return am_Invalid
+                ; some expression
+                ; zp or absolute depends on the value of the symbol referenced
+                return am_Invalid       ; TODO
            }
            '#' -> {
                if @(operand_ptr+1)
@ -452,24 +468,28 @@ instructions {
            }
            '(' -> {
                ; some indirect TODO
+                ; can be (zp), (zp,x), (zp),y, (abs), (abs,x)
                if @(operand_ptr+1)
                    return am_Ind
                return am_Invalid
            }
            '$' -> {
                ; hex address TODO
+                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '%' -> {
                ; bin address TODO
+                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
                ; absolute or indexed address TODO
+                ; can be followed by ,x or ,y
                return am_Abs
            }
        }
--- a/examples/cx16/assembler/gen_opcodes.py
+++ b/examples/cx16/assembler/gen_opcodes.py
@ -1,3 +1,4 @@
+import sys
 from collections import Counter
 from enum import IntEnum

@ -299,15 +300,15 @@ for ins in Instructions:
    else:
        InstructionsByMode[ins[2]].append((ins[1], ins[0]))

-# build the name->modes table

-print("; generated by opcodes.py")
-print("; addressing modes:")
-for mode in AddrMode:
+def generate_mnemonics_parser():
+    print("; generated by opcodes.py")
+    print("; addressing modes:")
+    for mode in AddrMode:
        print(";", mode.value, "=", mode.name)
-print()
+    print()

-print("""
+    print("""
        .enc "petscii"  ;define an ascii to petscii encoding
        .cdef " @", 32  ;characters
        .cdef "AZ", $c1
@ -315,9 +316,9 @@ print("""
        .cdef "[[", $5b
        .cdef "]]", $5d
        .edef "<nothing>", [];replace with no bytes
-""")
+    """)

-for instr in sorted(InstructionsByName.items()):
+    for instr in sorted(InstructionsByName.items()):
        print("i_" + instr[0] + ":\n\t.byte  ", end="")
        if len(instr[1]) == 1:
            # many instructions have just 1 addressing mode, save space for those
@ -334,8 +335,7 @@ for instr in sorted(InstructionsByName.items()):
            print(",".join(str(o) for o in mode_opcodes), end="")
            print()

-
-def determine_mnemonics():
+    def determine_mnemonics():
        mnemonics = list(sorted(set(ins[1] for ins in Instructions)))

        # opcodes histogram (ordered by occurrence)  (in kernal + basic roms of the c64):
@ -383,32 +383,26 @@ def determine_mnemonics():
            raise ValueError("mnem count mismatch")
        return mnem2

+    mnemonics = determine_mnemonics()

-mnemonics = determine_mnemonics()
-
-
-def first_letters():
+    def first_letters():
        firstletters = {m[0]: 0 for m in mnemonics}
        return firstletters.keys()

-
-def second_letters(firstletter):
+    def second_letters(firstletter):
        secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter}
        return secondletters.keys()

-
-def third_letters(firstletter, secondletter):
+    def third_letters(firstletter, secondletter):
        thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter}
        return thirdletters.keys()

-
-def fourth_letters(firstletter, secondletter, thirdletter):
+    def fourth_letters(firstletter, secondletter, thirdletter):
        longmnem = [m for m in mnemonics if len(m) > 3]
        fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter}
        return fourthletters.keys()

-
-def make_tree():
+    def make_tree():
        tree = {}
        for first in first_letters():
            tree[first] = {
@ -423,14 +417,12 @@ def make_tree():
            }
        return tree

+    tree = make_tree()

-tree = make_tree()
-
-
-print("get_opcode_info    .proc")
-print("_mnem_fourth_letter = cx16.r4")
-print("_mnem_fifth_letter = cx16.r5")
-for first in tree:
+    print("get_opcode_info    .proc")
+    print("_mnem_fourth_letter = cx16.r4")
+    print("_mnem_fifth_letter = cx16.r5")
+    for first in tree:
        print("    cmp  #'%s'" % first)
        print("    bne  _not_%s" % first)
        for second in tree[first]:
@ -453,17 +445,17 @@ for first in tree:
                print("_not_%s%s%s:" % (first, second, third))
            print("_not_%s%s:" % (first, second))
        print("_not_%s:" % first)
-print("_invalid:")
-print("    lda  #0")
-print("    ldy  #0")
-print("    rts")
+    print("_invalid:")
+    print("    lda  #0")
+    print("    ldy  #0")
+    print("    rts")

-# the 4-letter mnemonics are:
-# smb[0-7]
-# bbr[0-7]
-# rmb[0-7]
-# bbs[0-7]
-for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
+    # the 4-letter mnemonics are:
+    # smb[0-7]
+    # bbr[0-7]
+    # rmb[0-7]
+    # bbs[0-7]
+    for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        print("_check_%s" % fourlettermnemonic)
        print("    lda  #<_tab_%s" % fourlettermnemonic)
        print("    ldy  #>_tab_%s" % fourlettermnemonic)
@ -471,7 +463,7 @@ for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        sty  P8ZP_SCRATCH_W2+1    
        bra  _check4""")

-print("""_check4
+    print("""_check4
        lda  _mnem_fourth_letter
        cmp  #'0'
        bcc  _invalid
@ -488,9 +480,23 @@ print("""_check4
        pla
        rts""")

-for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
+    for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        print("_tab_%s" % fourlettermnemonic)
        for ii in "01234567":
            print("    .word   i_%s%s" % (fourlettermnemonic, ii))

-print("    .pend")
+    print("    .pend")
+
+
+def generate_mnem_list():
+    for m in sorted(InstructionsByName):
+        print(m.upper())
+
+
+if __name__=="__main__":
+    if sys.argv[1]=="--mnemlist":
+        generate_mnem_list()
+    elif sys.argv[1]=="--parser":
+        generate_mnemonics_parser()
+    else:
+        print("invalid arg")
--- a/examples/cx16/assembler/hashes.py
+++ b/examples/cx16/assembler/hashes.py
@ -0,0 +1,17 @@
+import re
+
+hashcode = open("perfecthash.c", "rt").read()
+
+entries = hashcode.split("wordlist")[1].split("{")[1].split("}")[0].strip().split(",")
+
+max_hash_value = int(re.search(r"MAX_HASH_VALUE = (\d+)", hashcode).group(1))
+
+if len(entries) != max_hash_value+1:
+    raise ValueError("inconsistent number of entries parsed")
+
+
+entries = [e.strip() for e in entries]
+entries = [None if e.endswith('0') else e.strip('"') for e in entries]
+
+for ix, entry in enumerate(entries):
+    print(ix, entry or "-")
--- a/examples/cx16/assembler/perfecthash.py
+++ b/examples/cx16/assembler/perfecthash.py
@ -0,0 +1,180 @@
+TOTAL_KEYWORDS = 98
+MIN_WORD_LENGTH = 3
+MAX_WORD_LENGTH = 4
+MIN_HASH_VALUE = 2
+MAX_HASH_VALUE = 134
+
+
+def hash(string: str, length: int) -> int:
+    asso_values = [
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135,  65,  62,
+       61,  58,  57,  54,  47,  46, 135, 135, 135, 135,
+      135, 135, 135, 135, 135,  26,   4,   1,   2,  33,
+        2, 135, 135,  15,  69,   4,  30,  10,  52,  17,
+        3,  34,  13,   0,   5,  29,   7,  69,  18,   6,
+       53, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135 ]
+
+    hval = 0
+    if length > 3:
+        hval += asso_values[ord(string[3])]
+    if length > 2:
+        hval += asso_values[ord(string[2])]
+    if length > 1:
+        hval += asso_values[ord(string[1])+1]
+    hval += asso_values[ord(string[0])]
+    return hval
+
+
+wordlist = [
+    None,
+    None,
+    "SBC",
+    "SEC",
+    "SED",
+    "DEC",
+    "BCS",
+    "BCC",
+    "BRK",
+    "TRB",
+    "DEY",
+    "TXS",
+    "CLC",
+    "CLD",
+    "TSB",
+    "TAY",
+    "PLP",
+    "SEI",
+    "CLV",
+    "PLY",
+    None,
+    "PHP",
+    "DEX",
+    None,
+    "PHY",
+    None,
+    "CLI",
+    "TAX",
+    "TSX",
+    "ROR",
+    "BRA",
+    "PLX",
+    "STP",
+    "INC",
+    None,
+    "STY",
+    "PHX",
+    "TXA",
+    "INY",
+    "PLA",
+    "BEQ",
+    "CPY",
+    "RTS",
+    "ORA",
+    "PHA",
+    "AND",
+    "ROL",
+    "STX",
+    "LSR",
+    "EOR",
+    "INX",
+    "BBS7",
+    "BBS6",
+    "CPX",
+    "BNE",
+    "STA",
+    "CMP",
+    "RTI",
+    "NOP",
+    "BBS5",
+    "ADC",
+    "ASL",
+    "BBS4",
+    "BBS3",
+    "BBR7",
+    "BBR6",
+    "BBS2",
+    "BBS1",
+    "BPL",
+    "LDY",
+    "BBS0",
+    "BMI",
+    "BBR5",
+    "BVS",
+    "BVC",
+    "BBR4",
+    "BBR3",
+    None,
+    "BIT",
+    "BBR2",
+    "BBR1",
+    "LDX",
+    "STZ",
+    "BBR0",
+    "TYA",
+    None,
+    None,
+    "JSR",
+    "WAI",
+    "LDA",
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    "SMB7",
+    "SMB6",
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    "SMB5",
+    None,
+    None,
+    "SMB4",
+    "SMB3",
+    "RMB7",
+    "RMB6",
+    "SMB2",
+    "SMB1",
+    None,
+    None,
+    "SMB0",
+    None,
+    "RMB5",
+    "JMP",
+    None,
+    "RMB4",
+    "RMB3",
+    None,
+    None,
+    "RMB2",
+    "RMB1",
+    None,
+    None,
+    "RMB0"
+    ]
+
+def in_word_set(string: str) -> bool:
+    length = len(string)
+    if 3 <= length <= 4:
+        key = hash(string, length)
+        if key <= MAX_HASH_VALUE:
+            word = wordlist[key]
+            return word and word==string
+    return False