prog8/examples/cx16/assembler/assem.p8

%target cx16
%import test_stack
%import textio
%import string
%zeropage basicsafe
%option no_sysinit


main {

    sub start() {
        txt.lowercase()
        txt.print("\nAssembler.\nEmpty line to stop.\n")

        textparse.user_input()

        ; test_stack.test()
    }

}

textparse {
    str[16] addr_modes =     ["Imp", "Acc", "Imm", "Zp", "ZpX", "ZpY", "Rel", "Abs", "AbsX", "AbsY", "Ind", "IzX", "IzY", "Zpr", "Izp", "IaX" ]
    ubyte[16] operand_size = [0,     0,     1,     1,    1,     1,     1,     2,     2,      2,      2,     1,     1,     1,     1,     1]

    str input_line = "?" * 40
    uword[3] word_addrs
    uword program_counter = $4000

    sub user_input() {
        repeat {
            ubyte input_length = 0
            txt.chrout('A')
            txt.print_uwhex(program_counter, 1)
            txt.print(": ")
            ; simulate user always having at least one space at the start
            input_line[0] = ' '
            input_length = txt.input_chars(&input_line+1)
            txt.chrout('\n')

            if not input_length {
                txt.print("exit\n")
                return
            }

            preprocess_assignment_spacing()
            split_input()
            ; debug_print_words()

            if word_addrs[1] and @(word_addrs[1])=='='
                do_assign()
            else
                do_label_or_instr()
        }
    }

    sub do_assign() {
        ; target is in word_addrs[0], value is in word_addrs[2]   ('=' is in word_addrs[1])
        if not word_addrs[2] {
            txt.print("?syntax error\n")
            return
        }

        uword value = conv.any2uword(word_addrs[2])
        if string.compare(word_addrs[0], "*")==0 {
            program_counter = value
        } else {
            set_symbol(word_addrs[0], value)
        }
    }

    sub do_label_or_instr() {
        uword label_ptr = 0
        uword instr_ptr = 0
        uword operand_ptr = 0
        ubyte starts_with_whitespace = input_line[0]==' ' or input_line[0]==9 or input_line[0]==160

        if word_addrs[2] {
            label_ptr = word_addrs[0]
            instr_ptr = word_addrs[1]
            operand_ptr = word_addrs[2]
            lowercase(operand_ptr)
        } else if word_addrs[1] {
            if starts_with_whitespace {
                instr_ptr = word_addrs[0]
                operand_ptr = word_addrs[1]
                lowercase(operand_ptr)
            } else {
                label_ptr = word_addrs[0]
                instr_ptr = word_addrs[1]
            }
        } else if word_addrs[0] {
            if starts_with_whitespace
                instr_ptr = word_addrs[0]
            else
                label_ptr = word_addrs[0]
        }

        if label_ptr {
            uword lastlabelchar = label_ptr + string.length(label_ptr)-1
            if @(lastlabelchar) == ':'
                @(lastlabelchar) = 0
            if instructions.match(label_ptr) {
                txt.print("?label cannot be a mnemonic\n")
                return
            }
            set_symbol(label_ptr, program_counter)
        }
        if instr_ptr {
;                txt.print("instr: ")
;                txt.print(instr_ptr)
;                txt.chrout('\n')

;                if operand_ptr {
;                    txt.print("operand: ")
;                    txt.print(operand_ptr)
;                    txt.chrout('\n')
;                }

            assemble_instruction(instr_ptr, operand_ptr)
        }
    }

    sub assemble_instruction(uword instr_ptr, uword operand_ptr) {
        uword instruction_info_ptr = instructions.match(instr_ptr)
        if instruction_info_ptr {
            ubyte addr_mode = instructions.determine_addrmode(operand_ptr)
            ubyte opcode = instructions.opcode(instruction_info_ptr, addr_mode)
            if_cc {
                txt.print("?invalid operand\n")
            } else {
                ubyte num_operand_bytes = operand_size[addr_mode-1]
                if not parse_operand_value(operand_ptr, addr_mode) {
                    txt.print("?invalid operand")
                    return
                }
;                txt.print("(debug:) addr.mode: ")
;                txt.print_ub(addr_mode)
;                txt.chrout('\n')
                txt.chrout(' ')
                txt.print_uwhex(program_counter, 1)
                txt.print("   ")
                emit(opcode)
                if num_operand_bytes==1 {
                    emit(lsb(cx16.r0))
                } else if num_operand_bytes == 2 {
                    emit(lsb(cx16.r0))
                    emit(msb(cx16.r0))
                }
                txt.chrout('\n')
            }
        } else {
            txt.print("?instruction error\n")
        }
    }

    sub emit(ubyte value) {
        @(program_counter) = value
        program_counter++

        txt.print_ubhex(value, 0)
        txt.chrout(' ')
    }

    sub set_symbol(uword symbolname_ptr, uword value) {
        txt.print("symbol: ")
        txt.print(symbolname_ptr)
        txt.chrout('=')
        txt.print_uwhex(value, true)
        txt.chrout('\n')
    }

    sub lowercase(uword string) {
        ; TODO optimize in asm
        ubyte char = @(string)
        while char {
            @(string) = char & 127
            string++
            char = @(string)
        }
    }

    sub dummy(uword operand_ptr) -> uword {
        uword a1=rndw()
        uword a6=a1+operand_ptr
        return a6
    }

    sub parse_operand_value(uword operand_ptr, ubyte addr_mode) -> ubyte {
        ; -- returns true/false success status,  the value is in cx16.r0 if succesful
        ; TODO number parsing error detection
        ; TODO optimize this (coalesce various parsing options)

        when addr_mode {
            instructions.am_Imp, instructions.am_Acc -> {
                ; nop / asl a  (no operands)
                return true
            }
            instructions.am_Imm -> {
                ; lda #$12
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
            instructions.am_Zp -> {
                ; lda  $02
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Zpr -> {
                ; brr0 $12,label
                ; TODO parse the label, relative offset
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_ZpX, instructions.am_ZpY -> {
                ; lda $02,x / lda $02,y
                ; TODO parse the ,x/y
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Rel -> {
                ; bcc  $c000
                cx16.r0 = conv.any2uword(operand_ptr)
                ; TODO calcualate relative offset to current programcounter
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Abs -> {
                ; jmp $1234
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_AbsX, instructions.am_AbsY -> {
                ; sta $3000,x / sta $3000,y
                ; TODO parse the ,x/,y
                cx16.r0 = conv.any2uword(operand_ptr)
                debug_print_value(operand_ptr)
                return true
            }
            instructions.am_Ind  -> {
                ; jmp ($fffc)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
            instructions.am_IzX, instructions.am_IzY, instructions.am_IaX  -> {
                ; lda ($02,x) / lda ($02),y / jmp ($a000,x)
                ; TODO parse the ,x/,y
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
            instructions.am_Izp  -> {
                ; lda ($02)
                cx16.r0 = conv.any2uword(operand_ptr+1)
                debug_print_value(operand_ptr+1)
                return true
            }
        }

        return false

        sub debug_print_value(uword optr) {
            txt.print("(debug:) operand=")
            txt.print(optr)
            txt.print("  -> value: ")
            txt.print_uwhex(cx16.r0, true)
            txt.chrout('\n')
        }
    }

    sub split_input() {
        ; first strip the input string of extra whitespace and comments
        ubyte copying_word = false
        ubyte word_count
        ubyte char_idx = 0

        word_addrs[0] = 0
        word_addrs[1] = 0
        word_addrs[2] = 0

        ubyte char
        for char in input_line {
            when char {
                ' ', 9, 160 -> {
                    if copying_word
                        input_line[char_idx] = 0; terminate word
                    copying_word = false
                }
                ';', 0 -> {
                    ; terminate line on comment char or end-of-string
                    break
                }
                else -> {
                    if not copying_word {
                        if word_count==3
                            break
                        word_addrs[word_count] = &input_line + char_idx
                        word_count++
                    }
                    copying_word = true
                }
            }
            char_idx++
        }

        char = input_line[char_idx]
        if char==' ' or char==9 or char==160 or char==';'
            input_line[char_idx] = 0
    }

    sub debug_print_words() {        ; TODO remove
        txt.print("(debug:) words: ")
        uword word_ptr
        for word_ptr in word_addrs {
            txt.chrout('[')
            txt.print(word_ptr)
            txt.print("] ")
        }
        txt.chrout('\n')
    }

    sub preprocess_assignment_spacing() {
        ; TODO optimize this... only do this if a valid instruction couldn't be parsed?
        str input_line2 = "?" * 40
        uword src = &input_line
        uword dest = &input_line2
        ubyte changed = 0

        ubyte cc
        for cc in input_line {
            if cc=='=' {
                @(dest) = ' '
                dest++
                @(dest) = '='
                dest++
                cc = ' '
                changed++
            }
            @(dest) = cc
            dest++
        }
        if changed {
            @(dest)=0
            string.copy(input_line2, src)
        }
    }
}

instructions {
    const ubyte am_Invalid = 0
    const ubyte am_Imp = 1
    const ubyte am_Acc = 2
    const ubyte am_Imm = 3
    const ubyte am_Zp = 4
    const ubyte am_ZpX = 5
    const ubyte am_ZpY = 6
    const ubyte am_Rel = 7
    const ubyte am_Abs = 8
    const ubyte am_AbsX = 9
    const ubyte am_AbsY = 10
    const ubyte am_Ind = 11
    const ubyte am_IzX = 12
    const ubyte am_IzY = 13
    const ubyte am_Zpr = 14
    const ubyte am_Izp = 15
    const ubyte am_IaX = 16

    sub determine_addrmode(uword operand_ptr) -> ubyte {

        if not operand_ptr
            return am_Imp

        when @(operand_ptr) {
            0 -> return am_Imp
            'a' -> {
                if @(operand_ptr+1) == 0
                    return am_Acc
                ; some expression
                ; zp or absolute depends on the value of the symbol referenced
                return am_Invalid       ; TODO
            }
            '#' -> {
                if @(operand_ptr+1)
                    return am_Imm
                return am_Invalid
            }
            '(' -> {
                ; some indirect TODO
                ; can be (zp), (zp,x), (zp),y, (abs), (abs,x)
                if @(operand_ptr+1)
                    return am_Ind
                return am_Invalid
            }
            '$' -> {
                ; hex address TODO
                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '%' -> {
                ; bin address TODO
                ; can be followed by ,x or ,y
                if @(operand_ptr+1)
                    return am_Abs
                return am_Invalid
            }
            '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
                ; absolute or indexed address TODO
                ; can be followed by ,x or ,y
                return am_Abs
            }
        }

        return am_Invalid
    }

    ; TODO: explore (benchmark) hash based matchers

    asmsub  match(uword mnemonic_ptr @AY) -> uword @AY {
        ; -- input: mnemonic_ptr in AY,   output:  pointer to instruction info structure or $0000 in AY
        %asm {{
            phx
            sta  P8ZP_SCRATCH_W1
            sty  P8ZP_SCRATCH_W1+1
            ldy  #0
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f   ; lowercase
            pha
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f   ; lowercase
            pha
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f   ; lowercase
            pha
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f   ; lowercase
            sta  cx16.r4                ; fourth letter in R4 (only exists for the few 4-letter mnemonics)
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f   ; lowercase
            sta  cx16.r5                ; fifth letter in R5 (should always be zero or whitespace for a valid mnemonic)
            pla
            tay
            pla
            tax
            pla
            jsr  get_opcode_info
            plx
            rts
        }}
    }

    asmsub  opcode(uword instr_info_ptr @AY, ubyte addr_mode @X) clobbers(X) -> ubyte @A, ubyte @Pc {
        ; -- input: instruction info struct ptr @AY,  desired addr_mode @X
        ;    output: opcode @A,   valid @carrybit
        %asm {{
            cpy  #0
            beq  _not_found
            sta  P8ZP_SCRATCH_W2
            sty  P8ZP_SCRATCH_W2+1
            stx  cx16.r15

            ; debug result address
            ;sec
            ;jsr  txt.print_uwhex
            ;lda  #13
            ;jsr  c64.CHROUT

            ldy  #0
            lda  (P8ZP_SCRATCH_W2),y
            beq  _multi_addrmodes
            iny
            lda  (P8ZP_SCRATCH_W2),y
            cmp  cx16.r15               ; check single possible addr.mode
            bne  _not_found
            iny
            lda  (P8ZP_SCRATCH_W2),y    ; get opcode
            sec
            rts

_not_found  lda  #0
            clc
            rts

_multi_addrmodes
            ldy  cx16.r15
            lda  (P8ZP_SCRATCH_W2),y    ; check opcode for addr.mode
            bne  _valid
            ; opcode $00 usually means 'invalid' but for "brk" it is actually valid so check for "brk"
            ldy  #0
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f       ; lowercase
            cmp  #'b'
            bne  _not_found
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f       ; lowercase
            cmp  #'r'
            bne  _not_found
            iny
            lda  (P8ZP_SCRATCH_W1),y
            and  #$7f       ; lowercase
            cmp  #'k'
            bne  _not_found
            lda  #0
_valid      sec
            rts
        }}
    }

    %asminclude "opcodes.asm", ""
}