assem

2024-12-24 01:29:28 +00:00 · 2021-01-11 01:42:22 +01:00 · 2021-01-11 01:42:22 +01:00 · b9ddde0f12
commit b9ddde0f12
parent a0ec37b35b
2 changed files with 249 additions and 77 deletions
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -2,9 +2,11 @@
 TODO
 ====

+- (github issue:) replace memory() function by some sort of declaration?
 - use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
 - optimize pointer access code @(pointer)? use a subroutine? macro?  65c02 vs 6502?
 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
+- add a compiler option to generate a symbol listing at the end
 - optimizer: detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
 - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
 - make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'
--- a/examples/cx16/assembler/assem.p8
+++ b/examples/cx16/assembler/assem.p8
@ -6,19 +6,48 @@
 %zeropage basicsafe
 %option no_sysinit

+; raw file loading of the large assembly file $c000-$ffff: 372 jiffies
+; time loading and actually processing it: 700 jiffies
+
 main {

    sub start() {
        txt.lowercase()
-        txt.print("\nAssembler.\nEmpty line to stop.\n")
+        txt.print("\n65c02 file based assembler.\n")

+        ; benchmar_raw_read()
        ; user_input()
        file_input()

        ; test_stack.test()
    }

+    sub benchmar_raw_read() {
+        str filename = "romdis.asm"
+        ubyte[256] buffer
+
+        if diskio.f_open(8, filename) {
+            c64.SETTIM(0,0,0)
+            txt.print(filename)
+            txt.print("\ntiming raw file loading..")
+            repeat {
+                uword siz= diskio.f_read(buffer, 256)
+                txt.chrout('.')
+                if not siz
+                    break
+            }
+            diskio.f_close()
+
+            txt.print("\ntime (jiffies): ")
+            txt.print_uw(c64.RDTIM16())
+            txt.nl()
+        }
+    }
+
+
    sub user_input() {
+        textparse.print_emit_bytes = true
+        txt.print("Empty line to stop.\n")
        repeat {
            ubyte input_length = 0
            txt.chrout('A')
@ -34,76 +63,113 @@ main {
                return
            }

-            textparse.process_line()
+            if not textparse.process_line()
+                break
        }
    }

    sub file_input() {
-        if diskio.f_open(8, "romdis.asm") {
+        textparse.print_emit_bytes = false
+        str filename = "hello.asm"
+
+        if diskio.f_open(8, filename) {
+            c64.SETTIM(0,0,0)
            uword line=0
-            repeat 5 {
+            txt.print(filename)
+            txt.print("\nassembling..")
+            repeat {
                if diskio.f_readline(textparse.input_line) {
                    line++
-                    txt.print_uw(line)
-                    txt.chrout(':')
-                    txt.print(textparse.input_line)
-                    txt.nl()
-                    textparse.process_line()
-                    if c64.READST()         ; TODO also check STOP key
+                    if not lsb(line)
+                        txt.chrout('.')
+
+                    if not textparse.process_line() {
+                        txt.print("\nerror. last line was ")
+                        txt.print_uw(line)
+                        txt.chrout(':')
+                        txt.print(textparse.word_addrs[0])
+                        txt.chrout(' ')
+                        txt.print(textparse.word_addrs[1])
+                        txt.chrout(' ')
+                        txt.print(textparse.word_addrs[2])
+                        txt.nl()
                        break
+                    }
+                    if c64.READST()
+                        break
+                    if c64.STOP2() {
+                        txt.print("?break\n")
+                        break
+                    }
                } else
                    break
            }
            diskio.f_close()
+
+            txt.print("\nlast pc: ")
+            txt.print_uwhex(textparse.program_counter, 1)
+            txt.print("\nlines: ")
+            txt.print_uw(line)
+            txt.print("\ntime (jiffies): ")
+            txt.print_uw(c64.RDTIM16())
+            txt.nl()
        }
    }
 }

 textparse {
    ; byte counts per address mode id:
-    ubyte[16] operand_size = [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2]
+    ubyte[17] operand_size = [$ff, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2]

    str input_line = "?" * 40
    uword[3] word_addrs
    uword program_counter = $4000
+    ubyte print_emit_bytes = true

-    sub process_line() {
-            string.lower(input_line)
-            preprocess_assignment_spacing()
-            split_input()
-            debug_print_words()
+    sub process_line() -> ubyte {
+        string.lower(input_line)
+        preprocess_assignment_spacing()
+        split_input()

-            if word_addrs[1] and @(word_addrs[1])=='='
-                do_assign()
-            else
-                do_label_or_instr()
+        if word_addrs[1] and @(word_addrs[1])=='='
+            return do_assign()
+        else
+            return do_label_andor_instr()
+
+        return false
    }

-    sub do_assign() {
+    sub do_assign() -> ubyte {
        ; target is in word_addrs[0], value is in word_addrs[2]   ('=' is in word_addrs[1])
        if not word_addrs[2] {
            txt.print("?syntax error\n")
-            return
+            return false
+        }
+        ubyte valid_operand=false
+        if @(word_addrs[2])=='*' {
+            cx16.r15 = program_counter
+            valid_operand = true
+        } else {
+            ubyte nlen = conv.any2uword(word_addrs[2])
+            valid_operand = nlen and @(word_addrs[2]+nlen)==0
        }

-        ubyte nlen = conv.any2uword(word_addrs[2])
-        if nlen and @(word_addrs[2]+nlen)==0 {
+        if valid_operand {
            if string.compare(word_addrs[0], "*")==0 {
                program_counter = cx16.r15
+                txt.print("\npc set to: ")
+                txt.print_uwhex(program_counter, true)
+                txt.nl()
            } else {
                set_symbol(word_addrs[0], cx16.r15)
            }
-            return
+            return true
        }
-        txt.print("?invalid operand (assign)\n")
-        txt.print("   nlen=")
-        txt.print_ub(nlen)
-        txt.print("  word=")
-        txt.print(word_addrs[2])
-        txt.nl()
+        txt.print("?invalid operand\n")
+        return false
    }

-    sub do_label_or_instr() {
+    sub do_label_andor_instr() -> ubyte {
        uword label_ptr = 0
        uword instr_ptr = 0
        uword operand_ptr = 0
@ -134,57 +200,117 @@ textparse {
                @(lastlabelchar) = 0
            if instructions.match(label_ptr) {
                txt.print("?label cannot be a mnemonic\n")
-                return
+                return false
            }
            set_symbol(label_ptr, program_counter)
        }
        if instr_ptr {
-;                txt.print("instr: ")
-;                txt.print(instr_ptr)
-;                txt.nl()
+            if @(instr_ptr)=='.'
+                return process_assembler_directive(instr_ptr, operand_ptr)

-;                if operand_ptr {
-;                    txt.print("operand: ")
-;                    txt.print(operand_ptr)
-;                    txt.nl()
-;                }
-
-            assemble_instruction(instr_ptr, operand_ptr)
+            return assemble_instruction(instr_ptr, operand_ptr)
        }
+
+        return true     ; empty line
    }

-    sub assemble_instruction(uword instr_ptr, uword operand_ptr) {
+    sub assemble_instruction(uword instr_ptr, uword operand_ptr) -> ubyte {
        uword instruction_info_ptr = instructions.match(instr_ptr)
        if instruction_info_ptr {
            ; we got a mnemonic match, now process the operand (and its value, if applicable, into cx16.r15)
            ubyte addr_mode = parse_operand(operand_ptr)
+
            if addr_mode {
-                txt.print("operand ok, addr-mode=")
-                txt.print_ub(addr_mode)
-                txt.nl()
                ubyte opcode = instructions.opcode(instruction_info_ptr, addr_mode)
                if_cc {
-                    txt.print("?invalid instruction\n")
-                } else {
-                    ubyte num_operand_bytes = operand_size[addr_mode-1]
+                    ; most likely an invalid instruction BUT could also be a branchin instruction
+                    ; that needs its "absolute" operand recalculated as relative.
+                    ubyte retry = false
+                    when addr_mode {
+                        instructions.am_Abs -> {
+                            if @(instr_ptr)=='b' {
+                                addr_mode = instructions.am_Rel
+                                if not calc_relative_branch_into_r14()
+                                    return false
+                                cx16.r15 = cx16.r14
+                                retry = true
+                            }
+                        }
+                        instructions.am_Imp -> {
+                            addr_mode = instructions.am_Acc
+                            retry = true
+                        }
+                        instructions.am_Izp -> {
+                            addr_mode = instructions.am_Ind
+                            retry = true
+                        }
+                        instructions.am_Zp -> {
+                            addr_mode = instructions.am_Abs
+                            retry = true
+                        }
+                    }
+
+                    if retry
+                        opcode = instructions.opcode(instruction_info_ptr, addr_mode)
+
+                    if not opcode {
+                        txt.print("?invalid instruction\n")
+                        return false
+                    }
+                }
+
+                if addr_mode==instructions.am_Zpr {
+                    ; instructions like BBR4 $zp,$aaaa
+                    ; TODO parse second part of the operand
+;                    if not calc_relative_branch_into_r14()
+;                        return false
+;                    cx16.r15 |= (cx16.r14 << 8)
+;                    txt.print("TODO ZPR addrmode\n")
+;                    txt.print("opcode=")
+;                    txt.print_ubhex(opcode,1)
+;                    txt.print("  op1=")
+;                    txt.print_ubhex(lsb(cx16.r15),1)
+;                    txt.print("  op2=")
+;                    txt.print_ubhex(msb(cx16.r15),1)
+;                    return false
+                }
+
+                ubyte num_operand_bytes = operand_size[addr_mode]
+                if print_emit_bytes {
                    txt.chrout(' ')
                    txt.print_uwhex(program_counter, 1)
                    txt.print("   ")
-                    emit(opcode)
-                    if num_operand_bytes==1 {
-                        emit(lsb(cx16.r15))
-                    } else if num_operand_bytes == 2 {
-                        emit(lsb(cx16.r15))
-                        emit(msb(cx16.r15))
-                    }
-                    txt.nl()
                }
-                return
+                emit(opcode)
+                if num_operand_bytes==1 {
+                    emit(lsb(cx16.r15))
+                } else if num_operand_bytes == 2 {
+                    emit(lsb(cx16.r15))
+                    emit(msb(cx16.r15))
+                }
+                if print_emit_bytes
+                    txt.nl()
+                return true
            }
-            txt.print("?invalid operand (instr)\n")
-            return
+            txt.print("?invalid operand\n")
+            return false
        }
        txt.print("?invalid instruction\n")
+        return false
+    }
+
+    sub calc_relative_branch_into_r14() -> ubyte {
+        cx16.r14 = cx16.r15 - program_counter - 2
+        if msb(cx16.r14)  {
+            if cx16.r14 < $ff80 {
+                txt.print("?branch out of range\n")
+                return false
+            }
+        } else if cx16.r14 > $007f {
+            txt.print("?branch out of range\n")
+            return false
+        }
+        return true
    }

    sub parse_operand(uword operand_ptr) -> ubyte {
@ -192,7 +318,7 @@ textparse {
        ; - addressing mode id as result value or 0 (am_Invalid) when error
        ; - operand numeric value in cx16.r15 (if applicable)

-        ubyte firstchr = @(operand_ptr)
+        ubyte @zp firstchr = @(operand_ptr)
        ubyte parsed_len
        when firstchr {
            0 -> return instructions.am_Imp
@ -239,7 +365,7 @@ textparse {
                }
            }
            '$', '%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
-                ; address optionally followed by ,x or ,y
+                ; address optionally followed by ,x or ,y or ,address
                parsed_len = conv.any2uword(operand_ptr)
                if parsed_len {
                    operand_ptr += parsed_len
@ -259,6 +385,10 @@ textparse {
                            return instructions.am_ZpX
                        if str_is2(operand_ptr, ",y")
                            return instructions.am_ZpY
+                        if @(operand_ptr)==',' {
+                            ; assume BBR $zp,$aaaa or BBS $zp,$aaaa
+                            return instructions.am_Zpr
+                        }
                    }
                }
            }
@ -266,6 +396,46 @@ textparse {
        return instructions.am_Invalid
    }

+    sub process_assembler_directive(uword directive, uword operand) -> ubyte {
+        ; we only recognise .byte right now
+        if string.compare(directive, ".byte")==0 {
+            if operand {
+                ubyte length
+                length = conv.any2uword(operand)
+                if length {
+                    if msb(cx16.r15) {
+                        txt.print("?byte value too large\n")
+                        return false
+                    }
+                    if print_emit_bytes {
+                        txt.chrout(' ')
+                        txt.print_uwhex(program_counter, 1)
+                        txt.print("   ")
+                    }
+                    emit(lsb(cx16.r15))
+                    operand += length
+                    while @(operand)==',' {
+                        operand++
+                        length = conv.any2uword(operand)
+                        if not length
+                            break
+                        if msb(cx16.r15) {
+                            txt.print("?byte value too large\n")
+                            return false
+                        }
+                        emit(lsb(cx16.r15))
+                        operand += length
+                    }
+                    if print_emit_bytes
+                        txt.nl()
+                    return true
+                }
+            }
+        }
+        txt.print("?syntax error\n")
+        return false
+    }
+
    asmsub str_is1(uword st @R0, ubyte char @A) clobbers(Y) -> ubyte @A {
        %asm {{
            cmp  (cx16.r0)
@ -319,8 +489,10 @@ _is_2_entry
        @(program_counter) = value
        program_counter++

-        txt.print_ubhex(value, 0)
-        txt.chrout(' ')
+        if print_emit_bytes {
+            txt.print_ubhex(value, 0)
+            txt.chrout(' ')
+        }
    }

    sub set_symbol(uword symbolname_ptr, uword value) {
@ -341,13 +513,13 @@ _is_2_entry
        ; first strip the input string of extra whitespace and comments
        ubyte copying_word = false
        ubyte word_count
-        ubyte char_idx = 0
+        ubyte @zp char_idx = 0

        word_addrs[0] = 0
        word_addrs[1] = 0
        word_addrs[2] = 0

-        ubyte char
+        ubyte @zp char
        for char in input_line {
            when char {
                ' ', 9, 160 -> {
@ -389,13 +561,14 @@ _is_2_entry
    }

    sub preprocess_assignment_spacing() {
-        ; TODO optimize this... only do this if a valid instruction couldn't be parsed?
+        if not string.find(input_line, '=')
+            return
+
+        ; split the line around the '='
        str input_line2 = "?" * 40
        uword src = &input_line
        uword dest = &input_line2
-        ubyte changed = 0
-
-        ubyte cc
+        ubyte @zp cc
        for cc in input_line {
            if cc=='=' {
                @(dest) = ' '
@ -403,15 +576,12 @@ _is_2_entry
                @(dest) = '='
                dest++
                cc = ' '
-                changed++
            }
            @(dest) = cc
            dest++
        }
-        if changed {
-            @(dest)=0
-            void string.copy(input_line2, src)
-        }
+        @(dest)=0
+        void string.copy(input_line2, src)
    }
 }

@ -434,7 +604,7 @@ instructions {
    const ubyte am_Izp = 15
    const ubyte am_IaX = 16

-    ; TODO: explore (benchmark) hash based matchers
+    ; TODO: explore (benchmark) hash based matchers.   Faster (although the bulk of the time is not in the mnemonic matching)? Less memory?

    asmsub  match(uword mnemonic_ptr @AY) -> uword @AY {
        ; -- input: mnemonic_ptr in AY,   output:  pointer to instruction info structure or $0000 in AY