assem

2024-09-07 03:54:27 +00:00 · 2021-01-05 22:56:52 +01:00 · 2021-01-05 22:56:52 +01:00 · 76101d7f8d
commit 76101d7f8d
parent 7d6a0ab256
6 changed files with 450 additions and 217 deletions
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,6 +3,8 @@ TODO
 ====
 - move all str* builtin functions to a strings library module, mem* to the sys module. update docs.
 - use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
 - optimize pointer access code @(pointer)? use a subroutine? macro?  65c02 vs 6502?
 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
 - detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
--- a/examples/cx16/assembler/Makefile
+++ b/examples/cx16/assembler/Makefile
@ -0,0 +1,8 @@
 all: perfecthash.c opcodes.asm
 perfecthash.c:  gen_opcodes.py
 	python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c
 opcodes.asm:  gen_opcodes.py
 	python gen_opcodes.py --parser > opcodes.asm
--- a/examples/cx16/assembler/assem.p8
+++ b/examples/cx16/assembler/assem.p8
@ -61,12 +61,8 @@ textparse {
            return
        }
-        uword value = parse_number(word_addrs[2])
+        uword value = conv.any2uword(word_addrs[2])
        if strcmp("*", word_addrs[0])==0 {
            if value == $ffff {
                txt.print("?invalid address\n")
                return
            }
            program_counter = value
        } else {
            set_symbol(word_addrs[0], value)
@ -151,19 +147,6 @@ textparse {
                    emit(lsb(cx16.r0))
                    emit(msb(cx16.r0))
                }
                repeat 2-num_operand_bytes {
                    txt.print("   ")
                }
                txt.chrout(' ')
                txt.print(word_addrs[0])
                if word_addrs[1] {
                    txt.chrout(' ')
                    txt.print(word_addrs[1])
                }
                if word_addrs[2] {
                    txt.chrout(' ')
                    txt.print(word_addrs[2])
                }
                txt.chrout('\n')
            }
        } else {
@ -207,7 +190,6 @@ textparse {
        ; -- returns true/false success status,  the value is in cx16.r0 if succesful
        ; TODO number parsing error detection
        ; TODO optimize this (coalesce various parsing options)
        ; TODO fix number parsing by ending the number with \0 after the last digit
        when addr_mode {
            instructions.am_Imp, instructions.am_Acc -> {
@ -216,49 +198,76 @@ textparse {
            }
            instructions.am_Imm -> {
                ; lda #$12
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_Zp, instructions.am_Zpr -> {
+            instructions.am_Zp -> {
-                ; lda  $02 / brr0 $12,label
+                ; lda  $02
-                cx16.r0 = parse_number(operand_ptr)
+                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Zpr -> {
                ; brr0 $12,label
                ; TODO parse the label, relative offset
                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_ZpX, instructions.am_ZpY -> {
                ; lda $02,x / lda $02,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/y
                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Rel -> {
-                cx16.r0 = parse_number(operand_ptr)
+                ; bcc  $c000
                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                ; TODO calcualate relative offset to current programcounter
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Abs -> {
                ; jmp $1234
-                cx16.r0 = parse_number(operand_ptr)
+                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_AbsX, instructions.am_AbsY -> {
                ; sta $3000,x / sta $3000,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/,y
                terminate_number(operand_ptr)
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Ind  -> {
                ; jmp ($fffc)
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_IzX, instructions.am_IzY, instructions.am_Izp, instructions.am_IaX  -> {
+            instructions.am_IzX, instructions.am_IzY, instructions.am_IaX  -> {
-                ; lda ($02,x) / lda ($02),y / lda ($02) / jmp ($a000,x)
+                ; lda ($02,x) / lda ($02),y / jmp ($a000,x)
-                cx16.r0 = parse_number(operand_ptr+1)
+                ; TODO parse the ,x/,y
                terminate_number(operand_ptr+1)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
            instructions.am_Izp  -> {
                ; lda ($02)
                terminate_number(operand_ptr+1)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
@ -275,14 +284,20 @@ textparse {
        }
    }
-
+    sub terminate_number(uword strptr) {
-    sub parse_number(uword strptr) -> uword {
+        ; replace the first terminating character after a number (such as a , or close parens)
-        ; TODO move to conv module and optimize
+        ;  with a 0 to terminate the number and make the parse routine happy.
-        if @(strptr)=='$'
+        ; TODO remove this once the various conv routines are more robust and stop at a non-digit
-            return conv.hex2uword(strptr)
+        repeat {
-        if @(strptr)=='%'
+            when @(strptr) {
-            return conv.bin2uword(strptr)
+                0 -> return
-        return conv.str2uword(strptr)
+                ',', ')', ' ', 9, '\n' -> {
                    @(strptr) = 0
                    return
                }
            }
            strptr++
        }
    }
    sub split_input() {
@ -442,8 +457,9 @@ instructions {
            'a' -> {
                if @(operand_ptr+1) == 0
                    return am_Acc
-                ; some expression TODO
+                ; some expression
-                return am_Invalid
+                ; zp or absolute depends on the value of the symbol referenced
                return am_Invalid       ; TODO
            }
            '#' -> {
                if @(operand_ptr+1)
@ -452,24 +468,28 @@ instructions {
            }
            '(' -> {
                ; some indirect TODO
                ; can be (zp), (zp,x), (zp),y, (abs), (abs,x)
                if @(operand_ptr+1)
                    return am_Ind
                return am_Invalid
            }
            '$' -> {
                ; hex address TODO
                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '%' -> {
                ; bin address TODO
                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
                ; absolute or indexed address TODO
                ; can be followed by ,x or ,y
                return am_Abs
            }
        }
--- a/examples/cx16/assembler/gen_opcodes.py
+++ b/examples/cx16/assembler/gen_opcodes.py
@ -1,3 +1,4 @@
 import sys
 from collections import Counter
 from enum import IntEnum
@ -299,15 +300,15 @@ for ins in Instructions:
    else:
        InstructionsByMode[ins[2]].append((ins[1], ins[0]))
 # build the name->modes table
-print("; generated by opcodes.py")
+def generate_mnemonics_parser():
-print("; addressing modes:")
+    print("; generated by opcodes.py")
-for mode in AddrMode:
+    print("; addressing modes:")
    for mode in AddrMode:
        print(";", mode.value, "=", mode.name)
-print()
+    print()
-print("""
+    print("""
        .enc "petscii"  ;define an ascii to petscii encoding
        .cdef " @", 32  ;characters
        .cdef "AZ", $c1
@ -315,9 +316,9 @@ print("""
        .cdef "[[", $5b
        .cdef "]]", $5d
        .edef "<nothing>", [];replace with no bytes
-""")
+    """)
-for instr in sorted(InstructionsByName.items()):
+    for instr in sorted(InstructionsByName.items()):
        print("i_" + instr[0] + ":\n\t.byte  ", end="")
        if len(instr[1]) == 1:
            # many instructions have just 1 addressing mode, save space for those
@ -334,8 +335,7 @@ for instr in sorted(InstructionsByName.items()):
            print(",".join(str(o) for o in mode_opcodes), end="")
            print()
-
+    def determine_mnemonics():
 def determine_mnemonics():
        mnemonics = list(sorted(set(ins[1] for ins in Instructions)))
        # opcodes histogram (ordered by occurrence)  (in kernal + basic roms of the c64):
@ -383,32 +383,26 @@ def determine_mnemonics():
            raise ValueError("mnem count mismatch")
        return mnem2
    mnemonics = determine_mnemonics()
-mnemonics = determine_mnemonics()
+    def first_letters():
 def first_letters():
        firstletters = {m[0]: 0 for m in mnemonics}
        return firstletters.keys()
-
+    def second_letters(firstletter):
 def second_letters(firstletter):
        secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter}
        return secondletters.keys()
-
+    def third_letters(firstletter, secondletter):
 def third_letters(firstletter, secondletter):
        thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter}
        return thirdletters.keys()
-
+    def fourth_letters(firstletter, secondletter, thirdletter):
 def fourth_letters(firstletter, secondletter, thirdletter):
        longmnem = [m for m in mnemonics if len(m) > 3]
        fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter}
        return fourthletters.keys()
-
+    def make_tree():
 def make_tree():
        tree = {}
        for first in first_letters():
            tree[first] = {
@ -423,14 +417,12 @@ def make_tree():
            }
        return tree
    tree = make_tree()
-tree = make_tree()
+    print("get_opcode_info    .proc")
-
+    print("_mnem_fourth_letter = cx16.r4")
-
+    print("_mnem_fifth_letter = cx16.r5")
-print("get_opcode_info    .proc")
+    for first in tree:
 print("_mnem_fourth_letter = cx16.r4")
 print("_mnem_fifth_letter = cx16.r5")
 for first in tree:
        print("    cmp  #'%s'" % first)
        print("    bne  _not_%s" % first)
        for second in tree[first]:
@ -453,17 +445,17 @@ for first in tree:
                print("_not_%s%s%s:" % (first, second, third))
            print("_not_%s%s:" % (first, second))
        print("_not_%s:" % first)
-print("_invalid:")
+    print("_invalid:")
-print("    lda  #0")
+    print("    lda  #0")
-print("    ldy  #0")
+    print("    ldy  #0")
-print("    rts")
+    print("    rts")
-# the 4-letter mnemonics are:
+    # the 4-letter mnemonics are:
-# smb[0-7]
+    # smb[0-7]
-# bbr[0-7]
+    # bbr[0-7]
-# rmb[0-7]
+    # rmb[0-7]
-# bbs[0-7]
+    # bbs[0-7]
-for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
+    for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        print("_check_%s" % fourlettermnemonic)
        print("    lda  #<_tab_%s" % fourlettermnemonic)
        print("    ldy  #>_tab_%s" % fourlettermnemonic)
@ -471,7 +463,7 @@ for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        sty  P8ZP_SCRATCH_W2+1    
        bra  _check4""")
-print("""_check4
+    print("""_check4
        lda  _mnem_fourth_letter
        cmp  #'0'
        bcc  _invalid
@ -488,9 +480,23 @@ print("""_check4
        pla
        rts""")
-for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
+    for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
        print("_tab_%s" % fourlettermnemonic)
        for ii in "01234567":
            print("    .word   i_%s%s" % (fourlettermnemonic, ii))
-print("    .pend")
+    print("    .pend")
 def generate_mnem_list():
    for m in sorted(InstructionsByName):
        print(m.upper())
 if __name__=="__main__":
    if sys.argv[1]=="--mnemlist":
        generate_mnem_list()
    elif sys.argv[1]=="--parser":
        generate_mnemonics_parser()
    else:
        print("invalid arg")
--- a/examples/cx16/assembler/hashes.py
+++ b/examples/cx16/assembler/hashes.py
@ -0,0 +1,17 @@
 import re
 hashcode = open("perfecthash.c", "rt").read()
 entries = hashcode.split("wordlist")[1].split("{")[1].split("}")[0].strip().split(",")
 max_hash_value = int(re.search(r"MAX_HASH_VALUE = (\d+)", hashcode).group(1))
 if len(entries) != max_hash_value+1:
    raise ValueError("inconsistent number of entries parsed")
 entries = [e.strip() for e in entries]
 entries = [None if e.endswith('0') else e.strip('"') for e in entries]
 for ix, entry in enumerate(entries):
    print(ix, entry or "-")
--- a/examples/cx16/assembler/perfecthash.py
+++ b/examples/cx16/assembler/perfecthash.py
@ -0,0 +1,180 @@
 TOTAL_KEYWORDS = 98
 MIN_WORD_LENGTH = 3
 MAX_WORD_LENGTH = 4
 MIN_HASH_VALUE = 2
 MAX_HASH_VALUE = 134
 def hash(string: str, length: int) -> int:
    asso_values = [
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135,  65,  62,
       61,  58,  57,  54,  47,  46, 135, 135, 135, 135,
      135, 135, 135, 135, 135,  26,   4,   1,   2,  33,
        2, 135, 135,  15,  69,   4,  30,  10,  52,  17,
        3,  34,  13,   0,   5,  29,   7,  69,  18,   6,
       53, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
      135, 135, 135, 135, 135, 135, 135, 135, 135 ]
    hval = 0
    if length > 3:
        hval += asso_values[ord(string[3])]
    if length > 2:
        hval += asso_values[ord(string[2])]
    if length > 1:
        hval += asso_values[ord(string[1])+1]
    hval += asso_values[ord(string[0])]
    return hval
 wordlist = [
    None,
    None,
    "SBC",
    "SEC",
    "SED",
    "DEC",
    "BCS",
    "BCC",
    "BRK",
    "TRB",
    "DEY",
    "TXS",
    "CLC",
    "CLD",
    "TSB",
    "TAY",
    "PLP",
    "SEI",
    "CLV",
    "PLY",
    None,
    "PHP",
    "DEX",
    None,
    "PHY",
    None,
    "CLI",
    "TAX",
    "TSX",
    "ROR",
    "BRA",
    "PLX",
    "STP",
    "INC",
    None,
    "STY",
    "PHX",
    "TXA",
    "INY",
    "PLA",
    "BEQ",
    "CPY",
    "RTS",
    "ORA",
    "PHA",
    "AND",
    "ROL",
    "STX",
    "LSR",
    "EOR",
    "INX",
    "BBS7",
    "BBS6",
    "CPX",
    "BNE",
    "STA",
    "CMP",
    "RTI",
    "NOP",
    "BBS5",
    "ADC",
    "ASL",
    "BBS4",
    "BBS3",
    "BBR7",
    "BBR6",
    "BBS2",
    "BBS1",
    "BPL",
    "LDY",
    "BBS0",
    "BMI",
    "BBR5",
    "BVS",
    "BVC",
    "BBR4",
    "BBR3",
    None,
    "BIT",
    "BBR2",
    "BBR1",
    "LDX",
    "STZ",
    "BBR0",
    "TYA",
    None,
    None,
    "JSR",
    "WAI",
    "LDA",
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    None,
    "SMB7",
    "SMB6",
    None,
    None,
    None,
    None,
    None,
    None,
    "SMB5",
    None,
    None,
    "SMB4",
    "SMB3",
    "RMB7",
    "RMB6",
    "SMB2",
    "SMB1",
    None,
    None,
    "SMB0",
    None,
    "RMB5",
    "JMP",
    None,
    "RMB4",
    "RMB3",
    None,
    None,
    "RMB2",
    "RMB1",
    None,
    None,
    "RMB0"
    ]
 def in_word_set(string: str) -> bool:
    length = len(string)
    if 3 <= length <= 4:
        key = hash(string, length)
        if key <= MAX_HASH_VALUE:
            word = wordlist[key]
            return word and word==string
    return False