This commit is contained in:
Irmen de Jong 2021-01-05 22:56:52 +01:00
parent 7d6a0ab256
commit 76101d7f8d
6 changed files with 450 additions and 217 deletions

View File

@ -3,6 +3,8 @@ TODO
==== ====
- move all str* builtin functions to a strings library module, mem* to the sys module. update docs. - move all str* builtin functions to a strings library module, mem* to the sys module. update docs.
- use (zp) addressing mode on 65c02 specific code rather than ldy#0 / lda (zp),y
- optimize pointer access code @(pointer)? use a subroutine? macro? 65c02 vs 6502?
- can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50 - can we get rid of the --longOptionName command line options and only keep the short versions? https://github.com/Kotlin/kotlinx-cli/issues/50
- detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation - detect variables that are written but never read - mark those as unused too and remove them, such as uword unused = memory("unused222", 20) - also remove the memory slab allocation
- hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine) - hoist all variable declarations up to the subroutine scope *before* even the constant folding takes place (to avoid undefined symbol errors when referring to a variable from another nested scope in the subroutine)

View File

@ -0,0 +1,8 @@
all: perfecthash.c opcodes.asm
perfecthash.c: gen_opcodes.py
python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c
opcodes.asm: gen_opcodes.py
python gen_opcodes.py --parser > opcodes.asm

View File

@ -61,12 +61,8 @@ textparse {
return return
} }
uword value = parse_number(word_addrs[2]) uword value = conv.any2uword(word_addrs[2])
if strcmp("*", word_addrs[0])==0 { if strcmp("*", word_addrs[0])==0 {
if value == $ffff {
txt.print("?invalid address\n")
return
}
program_counter = value program_counter = value
} else { } else {
set_symbol(word_addrs[0], value) set_symbol(word_addrs[0], value)
@ -151,19 +147,6 @@ textparse {
emit(lsb(cx16.r0)) emit(lsb(cx16.r0))
emit(msb(cx16.r0)) emit(msb(cx16.r0))
} }
repeat 2-num_operand_bytes {
txt.print(" ")
}
txt.chrout(' ')
txt.print(word_addrs[0])
if word_addrs[1] {
txt.chrout(' ')
txt.print(word_addrs[1])
}
if word_addrs[2] {
txt.chrout(' ')
txt.print(word_addrs[2])
}
txt.chrout('\n') txt.chrout('\n')
} }
} else { } else {
@ -207,7 +190,6 @@ textparse {
; -- returns true/false success status, the value is in cx16.r0 if succesful ; -- returns true/false success status, the value is in cx16.r0 if succesful
; TODO number parsing error detection ; TODO number parsing error detection
; TODO optimize this (coalesce various parsing options) ; TODO optimize this (coalesce various parsing options)
; TODO fix number parsing by ending the number with \0 after the last digit
when addr_mode { when addr_mode {
instructions.am_Imp, instructions.am_Acc -> { instructions.am_Imp, instructions.am_Acc -> {
@ -216,49 +198,76 @@ textparse {
} }
instructions.am_Imm -> { instructions.am_Imm -> {
; lda #$12 ; lda #$12
cx16.r0 = parse_number(operand_ptr+1) terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1) debug_print_value(operand_ptr+1)
return true return true
} }
instructions.am_Zp, instructions.am_Zpr -> { instructions.am_Zp -> {
; lda $02 / brr0 $12,label ; lda $02
cx16.r0 = parse_number(operand_ptr) terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
}
instructions.am_Zpr -> {
; brr0 $12,label
; TODO parse the label, relative offset
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr) debug_print_value(operand_ptr)
return true return true
} }
instructions.am_ZpX, instructions.am_ZpY -> { instructions.am_ZpX, instructions.am_ZpY -> {
; lda $02,x / lda $02,y ; lda $02,x / lda $02,y
cx16.r0 = parse_number(operand_ptr) ; TODO parse the ,x/y
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr) debug_print_value(operand_ptr)
return true return true
} }
instructions.am_Rel -> { instructions.am_Rel -> {
cx16.r0 = parse_number(operand_ptr) ; bcc $c000
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
; TODO calcualate relative offset to current programcounter ; TODO calcualate relative offset to current programcounter
debug_print_value(operand_ptr) debug_print_value(operand_ptr)
return true return true
} }
instructions.am_Abs -> { instructions.am_Abs -> {
; jmp $1234 ; jmp $1234
cx16.r0 = parse_number(operand_ptr) terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr) debug_print_value(operand_ptr)
return true return true
} }
instructions.am_AbsX, instructions.am_AbsY -> { instructions.am_AbsX, instructions.am_AbsY -> {
; sta $3000,x / sta $3000,y ; sta $3000,x / sta $3000,y
cx16.r0 = parse_number(operand_ptr) ; TODO parse the ,x/,y
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr) debug_print_value(operand_ptr)
return true return true
} }
instructions.am_Ind -> { instructions.am_Ind -> {
; jmp ($fffc) ; jmp ($fffc)
cx16.r0 = parse_number(operand_ptr+1) terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1) debug_print_value(operand_ptr+1)
return true return true
} }
instructions.am_IzX, instructions.am_IzY, instructions.am_Izp, instructions.am_IaX -> { instructions.am_IzX, instructions.am_IzY, instructions.am_IaX -> {
; lda ($02,x) / lda ($02),y / lda ($02) / jmp ($a000,x) ; lda ($02,x) / lda ($02),y / jmp ($a000,x)
cx16.r0 = parse_number(operand_ptr+1) ; TODO parse the ,x/,y
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1)
return true
}
instructions.am_Izp -> {
; lda ($02)
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1) debug_print_value(operand_ptr+1)
return true return true
} }
@ -275,14 +284,20 @@ textparse {
} }
} }
sub terminate_number(uword strptr) {
sub parse_number(uword strptr) -> uword { ; replace the first terminating character after a number (such as a , or close parens)
; TODO move to conv module and optimize ; with a 0 to terminate the number and make the parse routine happy.
if @(strptr)=='$' ; TODO remove this once the various conv routines are more robust and stop at a non-digit
return conv.hex2uword(strptr) repeat {
if @(strptr)=='%' when @(strptr) {
return conv.bin2uword(strptr) 0 -> return
return conv.str2uword(strptr) ',', ')', ' ', 9, '\n' -> {
@(strptr) = 0
return
}
}
strptr++
}
} }
sub split_input() { sub split_input() {
@ -442,8 +457,9 @@ instructions {
'a' -> { 'a' -> {
if @(operand_ptr+1) == 0 if @(operand_ptr+1) == 0
return am_Acc return am_Acc
; some expression TODO ; some expression
return am_Invalid ; zp or absolute depends on the value of the symbol referenced
return am_Invalid ; TODO
} }
'#' -> { '#' -> {
if @(operand_ptr+1) if @(operand_ptr+1)
@ -452,24 +468,28 @@ instructions {
} }
'(' -> { '(' -> {
; some indirect TODO ; some indirect TODO
; can be (zp), (zp,x), (zp),y, (abs), (abs,x)
if @(operand_ptr+1) if @(operand_ptr+1)
return am_Ind return am_Ind
return am_Invalid return am_Invalid
} }
'$' -> { '$' -> {
; hex address TODO ; hex address TODO
; can be followed by ,x or ,y
if @(operand_ptr+1) if @(operand_ptr+1)
return am_Abs return am_Abs
return am_Invalid return am_Invalid
} }
'%' -> { '%' -> {
; bin address TODO ; bin address TODO
; can be followed by ,x or ,y
if @(operand_ptr+1) if @(operand_ptr+1)
return am_Abs return am_Abs
return am_Invalid return am_Invalid
} }
'0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' -> {
; absolute or indexed address TODO ; absolute or indexed address TODO
; can be followed by ,x or ,y
return am_Abs return am_Abs
} }
} }

View File

@ -1,3 +1,4 @@
import sys
from collections import Counter from collections import Counter
from enum import IntEnum from enum import IntEnum
@ -299,15 +300,15 @@ for ins in Instructions:
else: else:
InstructionsByMode[ins[2]].append((ins[1], ins[0])) InstructionsByMode[ins[2]].append((ins[1], ins[0]))
# build the name->modes table
print("; generated by opcodes.py") def generate_mnemonics_parser():
print("; addressing modes:") print("; generated by opcodes.py")
for mode in AddrMode: print("; addressing modes:")
for mode in AddrMode:
print(";", mode.value, "=", mode.name) print(";", mode.value, "=", mode.name)
print() print()
print(""" print("""
.enc "petscii" ;define an ascii to petscii encoding .enc "petscii" ;define an ascii to petscii encoding
.cdef " @", 32 ;characters .cdef " @", 32 ;characters
.cdef "AZ", $c1 .cdef "AZ", $c1
@ -315,9 +316,9 @@ print("""
.cdef "[[", $5b .cdef "[[", $5b
.cdef "]]", $5d .cdef "]]", $5d
.edef "<nothing>", [];replace with no bytes .edef "<nothing>", [];replace with no bytes
""") """)
for instr in sorted(InstructionsByName.items()): for instr in sorted(InstructionsByName.items()):
print("i_" + instr[0] + ":\n\t.byte ", end="") print("i_" + instr[0] + ":\n\t.byte ", end="")
if len(instr[1]) == 1: if len(instr[1]) == 1:
# many instructions have just 1 addressing mode, save space for those # many instructions have just 1 addressing mode, save space for those
@ -334,8 +335,7 @@ for instr in sorted(InstructionsByName.items()):
print(",".join(str(o) for o in mode_opcodes), end="") print(",".join(str(o) for o in mode_opcodes), end="")
print() print()
def determine_mnemonics():
def determine_mnemonics():
mnemonics = list(sorted(set(ins[1] for ins in Instructions))) mnemonics = list(sorted(set(ins[1] for ins in Instructions)))
# opcodes histogram (ordered by occurrence) (in kernal + basic roms of the c64): # opcodes histogram (ordered by occurrence) (in kernal + basic roms of the c64):
@ -383,32 +383,26 @@ def determine_mnemonics():
raise ValueError("mnem count mismatch") raise ValueError("mnem count mismatch")
return mnem2 return mnem2
mnemonics = determine_mnemonics()
mnemonics = determine_mnemonics() def first_letters():
def first_letters():
firstletters = {m[0]: 0 for m in mnemonics} firstletters = {m[0]: 0 for m in mnemonics}
return firstletters.keys() return firstletters.keys()
def second_letters(firstletter):
def second_letters(firstletter):
secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter} secondletters = {m[1]: 0 for m in mnemonics if m[0] == firstletter}
return secondletters.keys() return secondletters.keys()
def third_letters(firstletter, secondletter):
def third_letters(firstletter, secondletter):
thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter} thirdletters = {m[2]: 0 for m in mnemonics if m[0] == firstletter and m[1] == secondletter}
return thirdletters.keys() return thirdletters.keys()
def fourth_letters(firstletter, secondletter, thirdletter):
def fourth_letters(firstletter, secondletter, thirdletter):
longmnem = [m for m in mnemonics if len(m) > 3] longmnem = [m for m in mnemonics if len(m) > 3]
fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter} fourthletters = {m[3]: 0 for m in longmnem if m[0] == firstletter and m[1] == secondletter and m[2] == thirdletter}
return fourthletters.keys() return fourthletters.keys()
def make_tree():
def make_tree():
tree = {} tree = {}
for first in first_letters(): for first in first_letters():
tree[first] = { tree[first] = {
@ -423,14 +417,12 @@ def make_tree():
} }
return tree return tree
tree = make_tree()
tree = make_tree() print("get_opcode_info .proc")
print("_mnem_fourth_letter = cx16.r4")
print("_mnem_fifth_letter = cx16.r5")
print("get_opcode_info .proc") for first in tree:
print("_mnem_fourth_letter = cx16.r4")
print("_mnem_fifth_letter = cx16.r5")
for first in tree:
print(" cmp #'%s'" % first) print(" cmp #'%s'" % first)
print(" bne _not_%s" % first) print(" bne _not_%s" % first)
for second in tree[first]: for second in tree[first]:
@ -453,17 +445,17 @@ for first in tree:
print("_not_%s%s%s:" % (first, second, third)) print("_not_%s%s%s:" % (first, second, third))
print("_not_%s%s:" % (first, second)) print("_not_%s%s:" % (first, second))
print("_not_%s:" % first) print("_not_%s:" % first)
print("_invalid:") print("_invalid:")
print(" lda #0") print(" lda #0")
print(" ldy #0") print(" ldy #0")
print(" rts") print(" rts")
# the 4-letter mnemonics are: # the 4-letter mnemonics are:
# smb[0-7] # smb[0-7]
# bbr[0-7] # bbr[0-7]
# rmb[0-7] # rmb[0-7]
# bbs[0-7] # bbs[0-7]
for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
print("_check_%s" % fourlettermnemonic) print("_check_%s" % fourlettermnemonic)
print(" lda #<_tab_%s" % fourlettermnemonic) print(" lda #<_tab_%s" % fourlettermnemonic)
print(" ldy #>_tab_%s" % fourlettermnemonic) print(" ldy #>_tab_%s" % fourlettermnemonic)
@ -471,7 +463,7 @@ for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
sty P8ZP_SCRATCH_W2+1 sty P8ZP_SCRATCH_W2+1
bra _check4""") bra _check4""")
print("""_check4 print("""_check4
lda _mnem_fourth_letter lda _mnem_fourth_letter
cmp #'0' cmp #'0'
bcc _invalid bcc _invalid
@ -488,9 +480,23 @@ print("""_check4
pla pla
rts""") rts""")
for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]: for fourlettermnemonic in ["smb", "bbr", "rmb", "bbs"]:
print("_tab_%s" % fourlettermnemonic) print("_tab_%s" % fourlettermnemonic)
for ii in "01234567": for ii in "01234567":
print(" .word i_%s%s" % (fourlettermnemonic, ii)) print(" .word i_%s%s" % (fourlettermnemonic, ii))
print(" .pend") print(" .pend")
def generate_mnem_list():
for m in sorted(InstructionsByName):
print(m.upper())
if __name__=="__main__":
if sys.argv[1]=="--mnemlist":
generate_mnem_list()
elif sys.argv[1]=="--parser":
generate_mnemonics_parser()
else:
print("invalid arg")

View File

@ -0,0 +1,17 @@
import re
hashcode = open("perfecthash.c", "rt").read()
entries = hashcode.split("wordlist")[1].split("{")[1].split("}")[0].strip().split(",")
max_hash_value = int(re.search(r"MAX_HASH_VALUE = (\d+)", hashcode).group(1))
if len(entries) != max_hash_value+1:
raise ValueError("inconsistent number of entries parsed")
entries = [e.strip() for e in entries]
entries = [None if e.endswith('0') else e.strip('"') for e in entries]
for ix, entry in enumerate(entries):
print(ix, entry or "-")

View File

@ -0,0 +1,180 @@
TOTAL_KEYWORDS = 98
MIN_WORD_LENGTH = 3
MAX_WORD_LENGTH = 4
MIN_HASH_VALUE = 2
MAX_HASH_VALUE = 134
def hash(string: str, length: int) -> int:
asso_values = [
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 65, 62,
61, 58, 57, 54, 47, 46, 135, 135, 135, 135,
135, 135, 135, 135, 135, 26, 4, 1, 2, 33,
2, 135, 135, 15, 69, 4, 30, 10, 52, 17,
3, 34, 13, 0, 5, 29, 7, 69, 18, 6,
53, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135 ]
hval = 0
if length > 3:
hval += asso_values[ord(string[3])]
if length > 2:
hval += asso_values[ord(string[2])]
if length > 1:
hval += asso_values[ord(string[1])+1]
hval += asso_values[ord(string[0])]
return hval
wordlist = [
None,
None,
"SBC",
"SEC",
"SED",
"DEC",
"BCS",
"BCC",
"BRK",
"TRB",
"DEY",
"TXS",
"CLC",
"CLD",
"TSB",
"TAY",
"PLP",
"SEI",
"CLV",
"PLY",
None,
"PHP",
"DEX",
None,
"PHY",
None,
"CLI",
"TAX",
"TSX",
"ROR",
"BRA",
"PLX",
"STP",
"INC",
None,
"STY",
"PHX",
"TXA",
"INY",
"PLA",
"BEQ",
"CPY",
"RTS",
"ORA",
"PHA",
"AND",
"ROL",
"STX",
"LSR",
"EOR",
"INX",
"BBS7",
"BBS6",
"CPX",
"BNE",
"STA",
"CMP",
"RTI",
"NOP",
"BBS5",
"ADC",
"ASL",
"BBS4",
"BBS3",
"BBR7",
"BBR6",
"BBS2",
"BBS1",
"BPL",
"LDY",
"BBS0",
"BMI",
"BBR5",
"BVS",
"BVC",
"BBR4",
"BBR3",
None,
"BIT",
"BBR2",
"BBR1",
"LDX",
"STZ",
"BBR0",
"TYA",
None,
None,
"JSR",
"WAI",
"LDA",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"SMB7",
"SMB6",
None,
None,
None,
None,
None,
None,
"SMB5",
None,
None,
"SMB4",
"SMB3",
"RMB7",
"RMB6",
"SMB2",
"SMB1",
None,
None,
"SMB0",
None,
"RMB5",
"JMP",
None,
"RMB4",
"RMB3",
None,
None,
"RMB2",
"RMB1",
None,
None,
"RMB0"
]
def in_word_set(string: str) -> bool:
length = len(string)
if 3 <= length <= 4:
key = hash(string, length)
if key <= MAX_HASH_VALUE:
word = wordlist[key]
return word and word==string
return False