assem

2025-02-18 20:30:43 +00:00 · 2021-01-05 22:56:52 +01:00 · 2021-01-05 22:56:52 +01:00 · 76101d7f8d
commit 76101d7f8d
parent 7d6a0ab256
6 changed files with 450 additions and 217 deletions
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,6 +3,8 @@ TODO
 ====

 - move all str* builtin functions to a strings library module, mem* to the sys module. update docs.
+- use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
+- optimize pointer access code @(pointer)? use a subroutine? macro?  65c02 vs 6502?
 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
 - detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
--- a/examples/cx16/assembler/Makefile
+++ b/examples/cx16/assembler/Makefile
@ -0,0 +1,8 @@
+all: perfecthash.c opcodes.asm
+
+perfecthash.c:  gen_opcodes.py
+	python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c
+
+opcodes.asm:  gen_opcodes.py
+	python gen_opcodes.py --parser > opcodes.asm
+
--- a/examples/cx16/assembler/assem.p8
+++ b/examples/cx16/assembler/assem.p8
@ -61,12 +61,8 @@ textparse {
            return
        }

-        uword value = parse_number(word_addrs[2])
+        uword value = conv.any2uword(word_addrs[2])
        if strcmp("*", word_addrs[0])==0 {
-            if value == $ffff {
-                txt.print("?invalid address\n")
-                return
-            }
            program_counter = value
        } else {
            set_symbol(word_addrs[0], value)
@ -151,19 +147,6 @@ textparse {
                    emit(lsb(cx16.r0))
                    emit(msb(cx16.r0))
                }
-                repeat 2-num_operand_bytes {
-                    txt.print("   ")
-                }
-                txt.chrout(' ')
-                txt.print(word_addrs[0])
-                if word_addrs[1] {
-                    txt.chrout(' ')
-                    txt.print(word_addrs[1])
-                }
-                if word_addrs[2] {
-                    txt.chrout(' ')
-                    txt.print(word_addrs[2])
-                }
                txt.chrout('\n')
            }
        } else {
@ -207,7 +190,6 @@ textparse {
        ; -- returns true/false success status,  the value is in cx16.r0 if succesful
        ; TODO number parsing error detection
        ; TODO optimize this (coalesce various parsing options)
-        ; TODO fix number parsing by ending the number with \0 after the last digit

        when addr_mode {
            instructions.am_Imp, instructions.am_Acc -> {
@ -216,49 +198,76 @@ textparse {
            }
            instructions.am_Imm -> {
                ; lda #$12
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_Zp, instructions.am_Zpr -> {
-                ; lda  $02 / brr0 $12,label
-                cx16.r0 = parse_number(operand_ptr)
+            instructions.am_Zp -> {
+                ; lda  $02
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
+                debug_print_value(operand_ptr)
+                return true
+            }
+            instructions.am_Zpr -> {
+                ; brr0 $12,label
+                ; TODO parse the label, relative offset
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_ZpX, instructions.am_ZpY -> {
                ; lda $02,x / lda $02,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/y
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Rel -> {
-                cx16.r0 = parse_number(operand_ptr)
+                ; bcc  $c000
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                ; TODO calcualate relative offset to current programcounter
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Abs -> {
                ; jmp $1234
-                cx16.r0 = parse_number(operand_ptr)
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_AbsX, instructions.am_AbsY -> {
                ; sta $3000,x / sta $3000,y
-                cx16.r0 = parse_number(operand_ptr)
+                ; TODO parse the ,x/,y
+                terminate_number(operand_ptr)
+                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Ind  -> {
                ; jmp ($fffc)
-                cx16.r0 = parse_number(operand_ptr+1)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
-            instructions.am_IzX, instructions.am_IzY, instructions.am_Izp, instructions.am_IaX  -> {
-                ; lda ($02,x) / lda ($02),y / lda ($02) / jmp ($a000,x)
-                cx16.r0 = parse_number(operand_ptr+1)
+            instructions.am_IzX, instructions.am_IzY, instructions.am_IaX  -> {
+                ; lda ($02,x) / lda ($02),y / jmp ($a000,x)
+                ; TODO parse the ,x/,y
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
+                debug_print_value(operand_ptr+1)
+                return true
+            }
+            instructions.am_Izp  -> {
+                ; lda ($02)
+                terminate_number(operand_ptr+1)
+                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
@ -275,14 +284,20 @@ textparse {
        }
    }

-
-    sub parse_number(uword strptr) -> uword {
-        ; TODO move to conv module and optimize
-        if @(strptr)=='$'
-            return conv.hex2uword(strptr)
-        if @(strptr)=='%'
-            return conv.bin2uword(strptr)
-        return conv.str2uword(strptr)
+    sub terminate_number(uword strptr) {
+        ; replace the first terminating character after a number (such as a , or close parens)
+        ;  with a 0 to terminate the number and make the parse routine happy.
+        ; TODO remove this once the various conv routines are more robust and stop at a non-digit
+        repeat {
+            when @(strptr) {
+                0 -> return
+                ',', ')', ' ', 9, '\n' -> {
+                    @(strptr) = 0
+                    return
+                }
+            }
+            strptr++
+        }
    }

    sub split_input() {
@ -442,8 +457,9 @@ instructions {
            'a' -> {
                if @(operand_ptr+1) == 0
                    return am_Acc
-                ; some expression TODO
-                return am_Invalid
+                ; some expression
+                ; zp or absolute depends on the value of the symbol referenced
+                return am_Invalid       ; TODO
            }
            '#' -> {
                if @(operand_ptr+1)
@ -452,24 +468,28 @@ instructions {
            }
            '(' -> {
                ; some indirect TODO
+                ; can be (zp), (zp,x), (zp),y, (abs), (abs,x)
                if @(operand_ptr+1)
                    return am_Ind
                return am_Invalid
            }
            '$' -> {
                ; hex address TODO
+                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '%' -> {
                ; bin address TODO
+                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
                ; absolute or indexed address TODO
+                ; can be followed by ,x or ,y
                return am_Abs
            }
        }
--- a/examples/cx16/assembler/gen_opcodes.py
+++ b/examples/cx16/assembler/gen_opcodes.py
@ -1,3 +1,4 @@
+import sys
 from collections import Counter
 from enum import IntEnum

@ -299,8 +300,8 @@ for ins in Instructions:
    else:
        InstructionsByMode[ins[2]].append((ins[1], ins[0]))

-# build the name->modes table

+def generate_mnemonics_parser():
    print("; generated by opcodes.py")
    print("; addressing modes:")
    for mode in AddrMode:
@ -334,7 +335,6 @@ for instr in sorted(InstructionsByName.items()):
            print(",".join(str(o) for o in mode_opcodes), end="")
            print()

-
    def determine_mnemonics():
        mnemonics = list(sorted(set(ins[1] for ins in Instructions)))

@ -383,31 +383,25 @@ def determine_mnemonics():
            raise ValueError("mnem count mismatch")
        return mnem2

-
    mnemonics = determine_mnemonics()

-
    def first_letters():
        firstletters = {m[0]: 0 for m in mnemonics}
        return firstletters.keys()

-
    def second_letters(firstletter):
        secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter}
        return secondletters.keys()

-
    def third_letters(firstletter, secondletter):
        thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter}
        return thirdletters.keys()

-
    def fourth_letters(firstletter, secondletter, thirdletter):
        longmnem = [m for m in mnemonics if len(m) > 3]
        fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter}
        return fourthletters.keys()

-
    def make_tree():
        tree = {}
        for first in first_letters():
@ -423,10 +417,8 @@ def make_tree():
            }
        return tree

-
    tree = make_tree()

-
    print("get_opcode_info    .proc")
    print("_mnem_fourth_letter = cx16.r4")
    print("_mnem_fifth_letter = cx16.r5")
@ -494,3 +486,17 @@ for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
            print("    .word   i_%s%s" % (fourlettermnemonic, ii))

    print("    .pend")
+
+
+def generate_mnem_list():
+    for m in sorted(InstructionsByName):
+        print(m.upper())
+
+
+if __name__=="__main__":
+    if sys.argv[1]=="--mnemlist":
+        generate_mnem_list()
+    elif sys.argv[1]=="--parser":
+        generate_mnemonics_parser()
+    else:
+        print("invalid arg")
--- a/examples/cx16/assembler/hashes.py
+++ b/examples/cx16/assembler/hashes.py
@ -0,0 +1,17 @@
+import re
+
+hashcode = open("perfecthash.c", "rt").read()
+
+entries = hashcode.split("wordlist")[1].split("{")[1].split("}")[0].strip().split(",")
+
+max_hash_value = int(re.search(r"MAX_HASH_VALUE = (\d+)", hashcode).group(1))
+
+if len(entries) != max_hash_value+1:
+    raise ValueError("inconsistent number of entries parsed")
+
+
+entries = [e.strip() for e in entries]
+entries = [None if e.endswith('0') else e.strip('"') for e in entries]
+
+for ix, entry in enumerate(entries):
+    print(ix, entry or "-")
--- a/examples/cx16/assembler/perfecthash.py
+++ b/examples/cx16/assembler/perfecthash.py
@ -0,0 +1,180 @@
+TOTAL_KEYWORDS = 98
+MIN_WORD_LENGTH = 3
+MAX_WORD_LENGTH = 4
+MIN_HASH_VALUE = 2
+MAX_HASH_VALUE = 134
+
+
+def hash(string: str, length: int) -> int:
+    asso_values = [
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135,  65,  62,
+       61,  58,  57,  54,  47,  46, 135, 135, 135, 135,
+      135, 135, 135, 135, 135,  26,   4,   1,   2,  33,
+        2, 135, 135,  15,  69,   4,  30,  10,  52,  17,
+        3,  34,  13,   0,   5,  29,   7,  69,  18,   6,
+       53, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
+      135, 135, 135, 135, 135, 135, 135, 135, 135 ]
+
+    hval = 0
+    if length > 3:
+        hval += asso_values[ord(string[3])]
+    if length > 2:
+        hval += asso_values[ord(string[2])]
+    if length > 1:
+        hval += asso_values[ord(string[1])+1]
+    hval += asso_values[ord(string[0])]
+    return hval
+
+
+wordlist = [
+    None,
+    None,
+    "SBC",
+    "SEC",
+    "SED",
+    "DEC",
+    "BCS",
+    "BCC",
+    "BRK",
+    "TRB",
+    "DEY",
+    "TXS",
+    "CLC",
+    "CLD",
+    "TSB",
+    "TAY",
+    "PLP",
+    "SEI",
+    "CLV",
+    "PLY",
+    None,
+    "PHP",
+    "DEX",
+    None,
+    "PHY",
+    None,
+    "CLI",
+    "TAX",
+    "TSX",
+    "ROR",
+    "BRA",
+    "PLX",
+    "STP",
+    "INC",
+    None,
+    "STY",
+    "PHX",
+    "TXA",
+    "INY",
+    "PLA",
+    "BEQ",
+    "CPY",
+    "RTS",
+    "ORA",
+    "PHA",
+    "AND",
+    "ROL",
+    "STX",
+    "LSR",
+    "EOR",
+    "INX",
+    "BBS7",
+    "BBS6",
+    "CPX",
+    "BNE",
+    "STA",
+    "CMP",
+    "RTI",
+    "NOP",
+    "BBS5",
+    "ADC",
+    "ASL",
+    "BBS4",
+    "BBS3",
+    "BBR7",
+    "BBR6",
+    "BBS2",
+    "BBS1",
+    "BPL",
+    "LDY",
+    "BBS0",
+    "BMI",
+    "BBR5",
+    "BVS",
+    "BVC",
+    "BBR4",
+    "BBR3",
+    None,
+    "BIT",
+    "BBR2",
+    "BBR1",
+    "LDX",
+    "STZ",
+    "BBR0",
+    "TYA",
+    None,
+    None,
+    "JSR",
+    "WAI",
+    "LDA",
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    "SMB7",
+    "SMB6",
+    None,
+    None,
+    None,
+    None,
+    None,
+    None,
+    "SMB5",
+    None,
+    None,
+    "SMB4",
+    "SMB3",
+    "RMB7",
+    "RMB6",
+    "SMB2",
+    "SMB1",
+    None,
+    None,
+    "SMB0",
+    None,
+    "RMB5",
+    "JMP",
+    None,
+    "RMB4",
+    "RMB3",
+    None,
+    None,
+    "RMB2",
+    "RMB1",
+    None,
+    None,
+    "RMB0"
+    ]
+
+def in_word_set(string: str) -> bool:
+    length = len(string)
+    if 3 <= length <= 4:
+        key = hash(string, length)
+        if key <= MAX_HASH_VALUE:
+            word = wordlist[key]
+            return word and word==string
+    return False