This commit is contained in:
Irmen de Jong 2021-01-11 01:42:22 +01:00
parent a0ec37b35b
commit b9ddde0f12
2 changed files with 249 additions and 77 deletions

View File

@ -2,9 +2,11 @@
TODO TODO
==== ====
- (github issue:) replace memory() function by some sort of declaration?
- use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y - use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
- optimize pointer access code @(pointer)? use a subroutine? macro? 65c02 vs 6502? - optimize pointer access code @(pointer)? use a subroutine? macro? 65c02 vs 6502?
- can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
- add a compiler option to generate a symbol listing at the end
- optimizer: detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation - optimizer: detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
- hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine) - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)
- make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_' - make it possible to use cpu opcodes such as 'nop' as variable names by prefixing all asm vars with something such as '_'

View File

@ -6,19 +6,48 @@
%zeropage basicsafe %zeropage basicsafe
%option no_sysinit %option no_sysinit
; raw file loading of the large assembly file $c000-$ffff: 372 jiffies
; time loading and actually processing it: 700 jiffies
main { main {
sub start() { sub start() {
txt.lowercase() txt.lowercase()
txt.print("\nAssembler.\nEmpty line to stop.\n") txt.print("\n65c02 file based assembler.\n")
; benchmar_raw_read()
; user_input() ; user_input()
file_input() file_input()
; test_stack.test() ; test_stack.test()
} }
sub benchmar_raw_read() {
str filename = "romdis.asm"
ubyte[256] buffer
if diskio.f_open(8, filename) {
c64.SETTIM(0,0,0)
txt.print(filename)
txt.print("\ntiming raw file loading..")
repeat {
uword siz= diskio.f_read(buffer, 256)
txt.chrout('.')
if not siz
break
}
diskio.f_close()
txt.print("\ntime (jiffies): ")
txt.print_uw(c64.RDTIM16())
txt.nl()
}
}
sub user_input() { sub user_input() {
textparse.print_emit_bytes = true
txt.print("Empty line to stop.\n")
repeat { repeat {
ubyte input_length = 0 ubyte input_length = 0
txt.chrout('A') txt.chrout('A')
@ -34,76 +63,113 @@ main {
return return
} }
textparse.process_line() if not textparse.process_line()
break
} }
} }
sub file_input() { sub file_input() {
if diskio.f_open(8, "romdis.asm") { textparse.print_emit_bytes = false
str filename = "hello.asm"
if diskio.f_open(8, filename) {
c64.SETTIM(0,0,0)
uword line=0 uword line=0
repeat 5 { txt.print(filename)
txt.print("\nassembling..")
repeat {
if diskio.f_readline(textparse.input_line) { if diskio.f_readline(textparse.input_line) {
line++ line++
if not lsb(line)
txt.chrout('.')
if not textparse.process_line() {
txt.print("\nerror. last line was ")
txt.print_uw(line) txt.print_uw(line)
txt.chrout(':') txt.chrout(':')
txt.print(textparse.input_line) txt.print(textparse.word_addrs[0])
txt.chrout(' ')
txt.print(textparse.word_addrs[1])
txt.chrout(' ')
txt.print(textparse.word_addrs[2])
txt.nl() txt.nl()
textparse.process_line()
if c64.READST() ; TODO also check STOP key
break break
}
if c64.READST()
break
if c64.STOP2() {
txt.print("?break\n")
break
}
} else } else
break break
} }
diskio.f_close() diskio.f_close()
txt.print("\nlast pc: ")
txt.print_uwhex(textparse.program_counter, 1)
txt.print("\nlines: ")
txt.print_uw(line)
txt.print("\ntime (jiffies): ")
txt.print_uw(c64.RDTIM16())
txt.nl()
} }
} }
} }
textparse { textparse {
; byte counts per address mode id: ; byte counts per address mode id:
ubyte[16] operand_size = [0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2] ubyte[17] operand_size = [$ff, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 1, 1, 2, 1, 2]
str input_line = "?" * 40 str input_line = "?" * 40
uword[3] word_addrs uword[3] word_addrs
uword program_counter = $4000 uword program_counter = $4000
ubyte print_emit_bytes = true
sub process_line() { sub process_line() -> ubyte {
string.lower(input_line) string.lower(input_line)
preprocess_assignment_spacing() preprocess_assignment_spacing()
split_input() split_input()
debug_print_words()
if word_addrs[1] and @(word_addrs[1])=='=' if word_addrs[1] and @(word_addrs[1])=='='
do_assign() return do_assign()
else else
do_label_or_instr() return do_label_andor_instr()
return false
} }
sub do_assign() { sub do_assign() -> ubyte {
; target is in word_addrs[0], value is in word_addrs[2] ('=' is in word_addrs[1]) ; target is in word_addrs[0], value is in word_addrs[2] ('=' is in word_addrs[1])
if not word_addrs[2] { if not word_addrs[2] {
txt.print("?syntax error\n") txt.print("?syntax error\n")
return return false
}
ubyte valid_operand=false
if @(word_addrs[2])=='*' {
cx16.r15 = program_counter
valid_operand = true
} else {
ubyte nlen = conv.any2uword(word_addrs[2])
valid_operand = nlen and @(word_addrs[2]+nlen)==0
} }
ubyte nlen = conv.any2uword(word_addrs[2]) if valid_operand {
if nlen and @(word_addrs[2]+nlen)==0 {
if string.compare(word_addrs[0], "*")==0 { if string.compare(word_addrs[0], "*")==0 {
program_counter = cx16.r15 program_counter = cx16.r15
txt.print("\npc set to: ")
txt.print_uwhex(program_counter, true)
txt.nl()
} else { } else {
set_symbol(word_addrs[0], cx16.r15) set_symbol(word_addrs[0], cx16.r15)
} }
return return true
} }
txt.print("?invalid operand (assign)\n") txt.print("?invalid operand\n")
txt.print(" nlen=") return false
txt.print_ub(nlen)
txt.print(" word=")
txt.print(word_addrs[2])
txt.nl()
} }
sub do_label_or_instr() { sub do_label_andor_instr() -> ubyte {
uword label_ptr = 0 uword label_ptr = 0
uword instr_ptr = 0 uword instr_ptr = 0
uword operand_ptr = 0 uword operand_ptr = 0
@ -134,42 +200,87 @@ textparse {
@(lastlabelchar) = 0 @(lastlabelchar) = 0
if instructions.match(label_ptr) { if instructions.match(label_ptr) {
txt.print("?label cannot be a mnemonic\n") txt.print("?label cannot be a mnemonic\n")
return return false
} }
set_symbol(label_ptr, program_counter) set_symbol(label_ptr, program_counter)
} }
if instr_ptr { if instr_ptr {
; txt.print("instr: ") if @(instr_ptr)=='.'
; txt.print(instr_ptr) return process_assembler_directive(instr_ptr, operand_ptr)
; txt.nl()
; if operand_ptr { return assemble_instruction(instr_ptr, operand_ptr)
; txt.print("operand: ")
; txt.print(operand_ptr)
; txt.nl()
; }
assemble_instruction(instr_ptr, operand_ptr)
}
} }
sub assemble_instruction(uword instr_ptr, uword operand_ptr) { return true ; empty line
}
sub assemble_instruction(uword instr_ptr, uword operand_ptr) -> ubyte {
uword instruction_info_ptr = instructions.match(instr_ptr) uword instruction_info_ptr = instructions.match(instr_ptr)
if instruction_info_ptr { if instruction_info_ptr {
; we got a mnemonic match, now process the operand (and its value, if applicable, into cx16.r15) ; we got a mnemonic match, now process the operand (and its value, if applicable, into cx16.r15)
ubyte addr_mode = parse_operand(operand_ptr) ubyte addr_mode = parse_operand(operand_ptr)
if addr_mode { if addr_mode {
txt.print("operand ok, addr-mode=")
txt.print_ub(addr_mode)
txt.nl()
ubyte opcode = instructions.opcode(instruction_info_ptr, addr_mode) ubyte opcode = instructions.opcode(instruction_info_ptr, addr_mode)
if_cc { if_cc {
; most likely an invalid instruction BUT could also be a branchin instruction
; that needs its "absolute" operand recalculated as relative.
ubyte retry = false
when addr_mode {
instructions.am_Abs -> {
if @(instr_ptr)=='b' {
addr_mode = instructions.am_Rel
if not calc_relative_branch_into_r14()
return false
cx16.r15 = cx16.r14
retry = true
}
}
instructions.am_Imp -> {
addr_mode = instructions.am_Acc
retry = true
}
instructions.am_Izp -> {
addr_mode = instructions.am_Ind
retry = true
}
instructions.am_Zp -> {
addr_mode = instructions.am_Abs
retry = true
}
}
if retry
opcode = instructions.opcode(instruction_info_ptr, addr_mode)
if not opcode {
txt.print("?invalid instruction\n") txt.print("?invalid instruction\n")
} else { return false
ubyte num_operand_bytes = operand_size[addr_mode-1] }
}
if addr_mode==instructions.am_Zpr {
; instructions like BBR4 $zp,$aaaa
; TODO parse second part of the operand
; if not calc_relative_branch_into_r14()
; return false
; cx16.r15 |= (cx16.r14 << 8)
; txt.print("TODO ZPR addrmode\n")
; txt.print("opcode=")
; txt.print_ubhex(opcode,1)
; txt.print(" op1=")
; txt.print_ubhex(lsb(cx16.r15),1)
; txt.print(" op2=")
; txt.print_ubhex(msb(cx16.r15),1)
; return false
}
ubyte num_operand_bytes = operand_size[addr_mode]
if print_emit_bytes {
txt.chrout(' ') txt.chrout(' ')
txt.print_uwhex(program_counter, 1) txt.print_uwhex(program_counter, 1)
txt.print(" ") txt.print(" ")
}
emit(opcode) emit(opcode)
if num_operand_bytes==1 { if num_operand_bytes==1 {
emit(lsb(cx16.r15)) emit(lsb(cx16.r15))
@ -177,14 +288,29 @@ textparse {
emit(lsb(cx16.r15)) emit(lsb(cx16.r15))
emit(msb(cx16.r15)) emit(msb(cx16.r15))
} }
if print_emit_bytes
txt.nl() txt.nl()
return true
} }
return txt.print("?invalid operand\n")
} return false
txt.print("?invalid operand (instr)\n")
return
} }
txt.print("?invalid instruction\n") txt.print("?invalid instruction\n")
return false
}
sub calc_relative_branch_into_r14() -> ubyte {
cx16.r14 = cx16.r15 - program_counter - 2
if msb(cx16.r14) {
if cx16.r14 < $ff80 {
txt.print("?branch out of range\n")
return false
}
} else if cx16.r14 > $007f {
txt.print("?branch out of range\n")
return false
}
return true
} }
sub parse_operand(uword operand_ptr) -> ubyte { sub parse_operand(uword operand_ptr) -> ubyte {
@ -192,7 +318,7 @@ textparse {
; - addressing mode id as result value or 0 (am_Invalid) when error ; - addressing mode id as result value or 0 (am_Invalid) when error
; - operand numeric value in cx16.r15 (if applicable) ; - operand numeric value in cx16.r15 (if applicable)
ubyte firstchr = @(operand_ptr) ubyte @zp firstchr = @(operand_ptr)
ubyte parsed_len ubyte parsed_len
when firstchr { when firstchr {
0 -> return instructions.am_Imp 0 -> return instructions.am_Imp
@ -239,7 +365,7 @@ textparse {
} }
} }
'$', '%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { '$', '%', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
; address optionally followed by ,x or ,y ; address optionally followed by ,x or ,y or ,address
parsed_len = conv.any2uword(operand_ptr) parsed_len = conv.any2uword(operand_ptr)
if parsed_len { if parsed_len {
operand_ptr += parsed_len operand_ptr += parsed_len
@ -259,6 +385,10 @@ textparse {
return instructions.am_ZpX return instructions.am_ZpX
if str_is2(operand_ptr, ",y") if str_is2(operand_ptr, ",y")
return instructions.am_ZpY return instructions.am_ZpY
if @(operand_ptr)==',' {
; assume BBR $zp,$aaaa or BBS $zp,$aaaa
return instructions.am_Zpr
}
} }
} }
} }
@ -266,6 +396,46 @@ textparse {
return instructions.am_Invalid return instructions.am_Invalid
} }
sub process_assembler_directive(uword directive, uword operand) -> ubyte {
; we only recognise .byte right now
if string.compare(directive, ".byte")==0 {
if operand {
ubyte length
length = conv.any2uword(operand)
if length {
if msb(cx16.r15) {
txt.print("?byte value too large\n")
return false
}
if print_emit_bytes {
txt.chrout(' ')
txt.print_uwhex(program_counter, 1)
txt.print(" ")
}
emit(lsb(cx16.r15))
operand += length
while @(operand)==',' {
operand++
length = conv.any2uword(operand)
if not length
break
if msb(cx16.r15) {
txt.print("?byte value too large\n")
return false
}
emit(lsb(cx16.r15))
operand += length
}
if print_emit_bytes
txt.nl()
return true
}
}
}
txt.print("?syntax error\n")
return false
}
asmsub str_is1(uword st @R0, ubyte char @A) clobbers(Y) -> ubyte @A { asmsub str_is1(uword st @R0, ubyte char @A) clobbers(Y) -> ubyte @A {
%asm {{ %asm {{
cmp (cx16.r0) cmp (cx16.r0)
@ -319,9 +489,11 @@ _is_2_entry
@(program_counter) = value @(program_counter) = value
program_counter++ program_counter++
if print_emit_bytes {
txt.print_ubhex(value, 0) txt.print_ubhex(value, 0)
txt.chrout(' ') txt.chrout(' ')
} }
}
sub set_symbol(uword symbolname_ptr, uword value) { sub set_symbol(uword symbolname_ptr, uword value) {
txt.print("symbol: ") txt.print("symbol: ")
@ -341,13 +513,13 @@ _is_2_entry
; first strip the input string of extra whitespace and comments ; first strip the input string of extra whitespace and comments
ubyte copying_word = false ubyte copying_word = false
ubyte word_count ubyte word_count
ubyte char_idx = 0 ubyte @zp char_idx = 0
word_addrs[0] = 0 word_addrs[0] = 0
word_addrs[1] = 0 word_addrs[1] = 0
word_addrs[2] = 0 word_addrs[2] = 0
ubyte char ubyte @zp char
for char in input_line { for char in input_line {
when char { when char {
' ', 9, 160 -> { ' ', 9, 160 -> {
@ -389,13 +561,14 @@ _is_2_entry
} }
sub preprocess_assignment_spacing() { sub preprocess_assignment_spacing() {
; TODO optimize this... only do this if a valid instruction couldn't be parsed? if not string.find(input_line, '=')
return
; split the line around the '='
str input_line2 = "?" * 40 str input_line2 = "?" * 40
uword src = &input_line uword src = &input_line
uword dest = &input_line2 uword dest = &input_line2
ubyte changed = 0 ubyte @zp cc
ubyte cc
for cc in input_line { for cc in input_line {
if cc=='=' { if cc=='=' {
@(dest) = ' ' @(dest) = ' '
@ -403,16 +576,13 @@ _is_2_entry
@(dest) = '=' @(dest) = '='
dest++ dest++
cc = ' ' cc = ' '
changed++
} }
@(dest) = cc @(dest) = cc
dest++ dest++
} }
if changed {
@(dest)=0 @(dest)=0
void string.copy(input_line2, src) void string.copy(input_line2, src)
} }
}
} }
instructions { instructions {
@ -434,7 +604,7 @@ instructions {
const ubyte am_Izp = 15 const ubyte am_Izp = 15
const ubyte am_IaX = 16 const ubyte am_IaX = 16
; TODO: explore (benchmark) hash based matchers ; TODO: explore (benchmark) hash based matchers. Faster (although the bulk of the time is not in the mnemonic matching)? Less memory?
asmsub match(uword mnemonic_ptr @AY) -> uword @AY { asmsub match(uword mnemonic_ptr @AY) -> uword @AY {
; -- input: mnemonic_ptr in AY, output: pointer to instruction info structure or $0000 in AY ; -- input: mnemonic_ptr in AY, output: pointer to instruction info structure or $0000 in AY