diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 53b91e48b..2afce30f3 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -2,9 +2,11 @@ TODO ==== +- (github issue:) replace memory() function by some sort of declaration? - use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y - optimize pointer access code @(pointer)? use a subroutine? macro? 65c02 vs 6502? - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50 +- add a compiler option to generate a symbol listing at the end - optimizer: detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine) - make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_' diff --git a/examples/cx16/assembler/assem.p8 b/examples/cx16/assembler/assem.p8 index bf92bf01d..605e3b6dc 100644 --- a/examples/cx16/assembler/assem.p8 +++ b/examples/cx16/assembler/assem.p8 @@ -6,19 +6,48 @@ %zeropage basicsafe %option no_sysinit +; raw file loading of the large assembly file $c000-$ffff: 372 jiffies +; time loading and actually processing it: 700 jiffies + main { sub start() { txt.lowercase() - txt.print("\nAssembler.\nEmpty line to stop.\n") + txt.print("\n65c02 file based assembler.\n") + ; benchmar_raw_read() ; user_input() file_input() ; test_stack.test() } + sub benchmar_raw_read() { + str filename = "romdis.asm" + ubyte[256] buffer + + if diskio.f_open(8, filename) { + c64.SETTIM(0,0,0) + txt.print(filename) + txt.print("\ntiming raw file loading..") + repeat { + uword siz= diskio.f_read(buffer, 256) + txt.chrout('.') + if not siz + break + } + diskio.f_close() + + txt.print("\ntime (jiffies): ") + txt.print_uw(c64.RDTIM16()) + txt.nl() + } + } + + sub user_input() { + textparse.print_emit_bytes = true + txt.print("Empty line to stop.\n") repeat { ubyte input_length = 0 txt.chrout('A') @@ -34,76 +63,113 @@ main { return } - textparse.process_line() + if not textparse.process_line() + break } } sub file_input() { - if diskio.f_open(8, "romdis.asm") { + textparse.print_emit_bytes = false + str filename = "hello.asm" + + if diskio.f_open(8, filename) { + c64.SETTIM(0,0,0) uword line=0 - repeat 5 { + txt.print(filename) + txt.print("\nassembling..") + repeat { if diskio.f_readline(textparse.input_line) { line++ - txt.print_uw(line) - txt.chrout(':') - txt.print(textparse.input_line) - txt.nl() - textparse.process_line() - if c64.READST() ; TODO also check STOP key + if not lsb(line) + txt.chrout('.') + + if not textparse.process_line() { + txt.print("\nerror. last line was ") + txt.print_uw(line) + txt.chrout(':') + txt.print(textparse.word_addrs[0]) + txt.chrout(' ') + txt.print(textparse.word_addrs[1]) + txt.chrout(' ') + txt.print(textparse.word_addrs[2]) + txt.nl() break + } + if c64.READST() + break + if c64.STOP2() { + txt.print("?break\n") + break + } } else break } diskio.f_close() + + txt.print("\nlast pc: ") + txt.print_uwhex(textparse.program_counter, 1) + txt.print("\nlines: ") + txt.print_uw(line) + txt.print("\ntime (jiffies): ") + txt.print_uw(c64.RDTIM16()) + txt.nl() } } } textparse { ; byte counts per address mode id: - ubyte[16] operand_size = [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2] + ubyte[17] operand_size = [$ff, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2] str input_line = "?" * 40 uword[3] word_addrs uword program_counter = $4000 + ubyte print_emit_bytes = true - sub process_line() { - string.lower(input_line) - preprocess_assignment_spacing() - split_input() - debug_print_words() + sub process_line() -> ubyte { + string.lower(input_line) + preprocess_assignment_spacing() + split_input() - if word_addrs[1] and @(word_addrs[1])=='=' - do_assign() - else - do_label_or_instr() + if word_addrs[1] and @(word_addrs[1])=='=' + return do_assign() + else + return do_label_andor_instr() + + return false } - sub do_assign() { + sub do_assign() -> ubyte { ; target is in word_addrs[0], value is in word_addrs[2] ('=' is in word_addrs[1]) if not word_addrs[2] { txt.print("?syntax error\n") - return + return false + } + ubyte valid_operand=false + if @(word_addrs[2])=='*' { + cx16.r15 = program_counter + valid_operand = true + } else { + ubyte nlen = conv.any2uword(word_addrs[2]) + valid_operand = nlen and @(word_addrs[2]+nlen)==0 } - ubyte nlen = conv.any2uword(word_addrs[2]) - if nlen and @(word_addrs[2]+nlen)==0 { + if valid_operand { if string.compare(word_addrs[0], "*")==0 { program_counter = cx16.r15 + txt.print("\npc set to: ") + txt.print_uwhex(program_counter, true) + txt.nl() } else { set_symbol(word_addrs[0], cx16.r15) } - return + return true } - txt.print("?invalid operand (assign)\n") - txt.print(" nlen=") - txt.print_ub(nlen) - txt.print(" word=") - txt.print(word_addrs[2]) - txt.nl() + txt.print("?invalid operand\n") + return false } - sub do_label_or_instr() { + sub do_label_andor_instr() -> ubyte { uword label_ptr = 0 uword instr_ptr = 0 uword operand_ptr = 0 @@ -134,57 +200,117 @@ textparse { @(lastlabelchar) = 0 if instructions.match(label_ptr) { txt.print("?label cannot be a mnemonic\n") - return + return false } set_symbol(label_ptr, program_counter) } if instr_ptr { -; txt.print("instr: ") -; txt.print(instr_ptr) -; txt.nl() + if @(instr_ptr)=='.' + return process_assembler_directive(instr_ptr, operand_ptr) -; if operand_ptr { -; txt.print("operand: ") -; txt.print(operand_ptr) -; txt.nl() -; } - - assemble_instruction(instr_ptr, operand_ptr) + return assemble_instruction(instr_ptr, operand_ptr) } + + return true ; empty line } - sub assemble_instruction(uword instr_ptr, uword operand_ptr) { + sub assemble_instruction(uword instr_ptr, uword operand_ptr) -> ubyte { uword instruction_info_ptr = instructions.match(instr_ptr) if instruction_info_ptr { ; we got a mnemonic match, now process the operand (and its value, if applicable, into cx16.r15) ubyte addr_mode = parse_operand(operand_ptr) + if addr_mode { - txt.print("operand ok, addr-mode=") - txt.print_ub(addr_mode) - txt.nl() ubyte opcode = instructions.opcode(instruction_info_ptr, addr_mode) if_cc { - txt.print("?invalid instruction\n") - } else { - ubyte num_operand_bytes = operand_size[addr_mode-1] + ; most likely an invalid instruction BUT could also be a branchin instruction + ; that needs its "absolute" operand recalculated as relative. + ubyte retry = false + when addr_mode { + instructions.am_Abs -> { + if @(instr_ptr)=='b' { + addr_mode = instructions.am_Rel + if not calc_relative_branch_into_r14() + return false + cx16.r15 = cx16.r14 + retry = true + } + } + instructions.am_Imp -> { + addr_mode = instructions.am_Acc + retry = true + } + instructions.am_Izp -> { + addr_mode = instructions.am_Ind + retry = true + } + instructions.am_Zp -> { + addr_mode = instructions.am_Abs + retry = true + } + } + + if retry + opcode = instructions.opcode(instruction_info_ptr, addr_mode) + + if not opcode { + txt.print("?invalid instruction\n") + return false + } + } + + if addr_mode==instructions.am_Zpr { + ; instructions like BBR4 $zp,$aaaa + ; TODO parse second part of the operand +; if not calc_relative_branch_into_r14() +; return false +; cx16.r15 |= (cx16.r14 << 8) +; txt.print("TODO ZPR addrmode\n") +; txt.print("opcode=") +; txt.print_ubhex(opcode,1) +; txt.print(" op1=") +; txt.print_ubhex(lsb(cx16.r15),1) +; txt.print(" op2=") +; txt.print_ubhex(msb(cx16.r15),1) +; return false + } + + ubyte num_operand_bytes = operand_size[addr_mode] + if print_emit_bytes { txt.chrout(' ') txt.print_uwhex(program_counter, 1) txt.print(" ") - emit(opcode) - if num_operand_bytes==1 { - emit(lsb(cx16.r15)) - } else if num_operand_bytes == 2 { - emit(lsb(cx16.r15)) - emit(msb(cx16.r15)) - } - txt.nl() } - return + emit(opcode) + if num_operand_bytes==1 { + emit(lsb(cx16.r15)) + } else if num_operand_bytes == 2 { + emit(lsb(cx16.r15)) + emit(msb(cx16.r15)) + } + if print_emit_bytes + txt.nl() + return true } - txt.print("?invalid operand (instr)\n") - return + txt.print("?invalid operand\n") + return false } txt.print("?invalid instruction\n") + return false + } + + sub calc_relative_branch_into_r14() -> ubyte { + cx16.r14 = cx16.r15 - program_counter - 2 + if msb(cx16.r14) { + if cx16.r14 < $ff80 { + txt.print("?branch out of range\n") + return false + } + } else if cx16.r14 > $007f { + txt.print("?branch out of range\n") + return false + } + return true } sub parse_operand(uword operand_ptr) -> ubyte { @@ -192,7 +318,7 @@ textparse { ; - addressing mode id as result value or 0 (am_Invalid) when error ; - operand numeric value in cx16.r15 (if applicable) - ubyte firstchr = @(operand_ptr) + ubyte @zp firstchr = @(operand_ptr) ubyte parsed_len when firstchr { 0 -> return instructions.am_Imp @@ -239,7 +365,7 @@ textparse { } } '$', '%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { - ; address optionally followed by ,x or ,y + ; address optionally followed by ,x or ,y or ,address parsed_len = conv.any2uword(operand_ptr) if parsed_len { operand_ptr += parsed_len @@ -259,6 +385,10 @@ textparse { return instructions.am_ZpX if str_is2(operand_ptr, ",y") return instructions.am_ZpY + if @(operand_ptr)==',' { + ; assume BBR $zp,$aaaa or BBS $zp,$aaaa + return instructions.am_Zpr + } } } } @@ -266,6 +396,46 @@ textparse { return instructions.am_Invalid } + sub process_assembler_directive(uword directive, uword operand) -> ubyte { + ; we only recognise .byte right now + if string.compare(directive, ".byte")==0 { + if operand { + ubyte length + length = conv.any2uword(operand) + if length { + if msb(cx16.r15) { + txt.print("?byte value too large\n") + return false + } + if print_emit_bytes { + txt.chrout(' ') + txt.print_uwhex(program_counter, 1) + txt.print(" ") + } + emit(lsb(cx16.r15)) + operand += length + while @(operand)==',' { + operand++ + length = conv.any2uword(operand) + if not length + break + if msb(cx16.r15) { + txt.print("?byte value too large\n") + return false + } + emit(lsb(cx16.r15)) + operand += length + } + if print_emit_bytes + txt.nl() + return true + } + } + } + txt.print("?syntax error\n") + return false + } + asmsub str_is1(uword st @R0, ubyte char @A) clobbers(Y) -> ubyte @A { %asm {{ cmp (cx16.r0) @@ -319,8 +489,10 @@ _is_2_entry @(program_counter) = value program_counter++ - txt.print_ubhex(value, 0) - txt.chrout(' ') + if print_emit_bytes { + txt.print_ubhex(value, 0) + txt.chrout(' ') + } } sub set_symbol(uword symbolname_ptr, uword value) { @@ -341,13 +513,13 @@ _is_2_entry ; first strip the input string of extra whitespace and comments ubyte copying_word = false ubyte word_count - ubyte char_idx = 0 + ubyte @zp char_idx = 0 word_addrs[0] = 0 word_addrs[1] = 0 word_addrs[2] = 0 - ubyte char + ubyte @zp char for char in input_line { when char { ' ', 9, 160 -> { @@ -389,13 +561,14 @@ _is_2_entry } sub preprocess_assignment_spacing() { - ; TODO optimize this... only do this if a valid instruction couldn't be parsed? + if not string.find(input_line, '=') + return + + ; split the line around the '=' str input_line2 = "?" * 40 uword src = &input_line uword dest = &input_line2 - ubyte changed = 0 - - ubyte cc + ubyte @zp cc for cc in input_line { if cc=='=' { @(dest) = ' ' @@ -403,15 +576,12 @@ _is_2_entry @(dest) = '=' dest++ cc = ' ' - changed++ } @(dest) = cc dest++ } - if changed { - @(dest)=0 - void string.copy(input_line2, src) - } + @(dest)=0 + void string.copy(input_line2, src) } } @@ -434,7 +604,7 @@ instructions { const ubyte am_Izp = 15 const ubyte am_IaX = 16 - ; TODO: explore (benchmark) hash based matchers + ; TODO: explore (benchmark) hash based matchers. Faster (although the bulk of the time is not in the mnemonic matching)? Less memory? asmsub match(uword mnemonic_ptr @AY) -> uword @AY { ; -- input: mnemonic_ptr in AY, output: pointer to instruction info structure or $0000 in AY