This commit is contained in:
Irmen de Jong 2021-01-06 00:23:54 +01:00
parent 07dd64958f
commit 4305984168
4 changed files with 335 additions and 252 deletions

View File

@ -1,7 +1,7 @@
all: perfecthash.c opcodes.asm
perfecthash.c: gen_opcodes.py
python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c
python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -D -E -m 100 > perfecthash.c
opcodes.asm: gen_opcodes.py
python gen_opcodes.py --parser > opcodes.asm

View File

@ -12,7 +12,6 @@ main {
txt.print("\nAssembler.\nEmpty line to stop.\n")
textparse.user_input()
; benchmark.benchmark()
; test_stack.test()
}
@ -198,14 +197,12 @@ textparse {
}
instructions.am_Imm -> {
; lda #$12
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1)
return true
}
instructions.am_Zp -> {
; lda $02
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
@ -213,7 +210,6 @@ textparse {
instructions.am_Zpr -> {
; brr0 $12,label
; TODO parse the label, relative offset
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
@ -221,14 +217,12 @@ textparse {
instructions.am_ZpX, instructions.am_ZpY -> {
; lda $02,x / lda $02,y
; TODO parse the ,x/y
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
}
instructions.am_Rel -> {
; bcc $c000
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
; TODO calcualate relative offset to current programcounter
debug_print_value(operand_ptr)
@ -236,7 +230,6 @@ textparse {
}
instructions.am_Abs -> {
; jmp $1234
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
@ -244,14 +237,12 @@ textparse {
instructions.am_AbsX, instructions.am_AbsY -> {
; sta $3000,x / sta $3000,y
; TODO parse the ,x/,y
terminate_number(operand_ptr)
cx16.r0 = conv.any2uword(operand_ptr)
debug_print_value(operand_ptr)
return true
}
instructions.am_Ind -> {
; jmp ($fffc)
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1)
return true
@ -259,14 +250,12 @@ textparse {
instructions.am_IzX, instructions.am_IzY, instructions.am_IaX -> {
; lda ($02,x) / lda ($02),y / jmp ($a000,x)
; TODO parse the ,x/,y
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1)
return true
}
instructions.am_Izp -> {
; lda ($02)
terminate_number(operand_ptr+1)
cx16.r0 = conv.any2uword(operand_ptr+1)
debug_print_value(operand_ptr+1)
return true
@ -284,22 +273,6 @@ textparse {
}
}
sub terminate_number(uword strptr) {
; replace the first terminating character after a number (such as a , or close parens)
; with a 0 to terminate the number and make the parse routine happy.
; TODO remove this once the various conv routines are more robust and stop at a non-digit
repeat {
when @(strptr) {
0 -> return
',', ')', ' ', 9, '\n' -> {
@(strptr) = 0
return
}
}
strptr++
}
}
sub split_input() {
; first strip the input string of extra whitespace and comments
ubyte copying_word = false
@ -378,56 +351,6 @@ textparse {
}
}
benchmark {
sub benchmark() {
str[20] mnemonics = ["lda", "ldx", "ldy", "jsr", "bcs", "rts", "lda", "ora", "and", "eor", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop"]
ubyte[20] modes = [3, 4, 8, 8, 7, 1, 12, 13, 5, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
uword valid = 0
const uword iterations = 40000 / len(mnemonics)
const uword amount = iterations * len(mnemonics)
txt.print("Benchmark.\nMatching ")
txt.print_uw(amount)
txt.print(" mnemonics")
c64.SETTIM(0,0,0)
uword total = 0
repeat iterations {
if lsb(total)==0
txt.chrout('.')
ubyte idx
for idx in 0 to len(mnemonics)-1 {
uword instr_info = instructions.match(mnemonics[idx])
ubyte opcode = instructions.opcode(instr_info, modes[idx])
if_cs
valid++
total++
}
}
uword current_time = c64.RDTIM16()
txt.print("\nDone.\nValid: ")
txt.print_uw(valid)
txt.print("\ninvalid: ")
txt.print_uw(amount-valid)
txt.print("\ntotal: ")
txt.print_uw(total)
txt.print("\nSeconds:")
uword secs = current_time / 60
current_time = (current_time - secs*60)*1000/60
txt.print_uw(secs)
txt.chrout('.')
if current_time<10
txt.chrout('0')
if current_time<100
txt.chrout('0')
txt.print_uw(current_time)
txt.chrout('\n')
}
}
instructions {
const ubyte am_Invalid = 0
const ubyte am_Imp = 1

View File

@ -0,0 +1,167 @@
%target cx16
%import test_stack
%import textio
%zeropage basicsafe
%option no_sysinit
main {
sub start() {
txt.print("\nassembler benchmark - tree match routine\n")
benchmark.benchmark()
test_stack.test()
}
}
benchmark {
sub benchmark() {
str[20] mnemonics = ["lda", "ldx", "ldy", "jsr", "bcs", "rts", "lda", "ora", "and", "eor", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop"]
ubyte[20] modes = [3, 4, 8, 8, 7, 1, 12, 13, 5, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
uword valid = 0
const uword iterations = 40000 / len(mnemonics)
const uword amount = iterations * len(mnemonics)
txt.print("matching ")
txt.print_uw(amount)
txt.print(" mnemonics")
c64.SETTIM(0,0,0)
uword total = 0
repeat iterations {
if lsb(total)==0
txt.chrout('.')
ubyte idx
for idx in 0 to len(mnemonics)-1 {
uword instr_info = instructions.match(mnemonics[idx])
ubyte opcode = instructions.opcode(instr_info, modes[idx])
if_cs
valid++
total++
}
}
uword current_time = c64.RDTIM16()
txt.print("\nvalid: ")
txt.print_uw(valid)
txt.print("\ninvalid: ")
txt.print_uw(amount-valid)
txt.print("\ntotal: ")
txt.print_uw(total)
txt.print("\n\nseconds:")
uword secs = current_time / 60
current_time = (current_time - secs*60)*1000/60
txt.print_uw(secs)
txt.chrout('.')
if current_time<10
txt.chrout('0')
if current_time<100
txt.chrout('0')
txt.print_uw(current_time)
txt.chrout('\n')
}
}
instructions {
asmsub match(uword mnemonic_ptr @AY) -> uword @AY {
; -- input: mnemonic_ptr in AY, output: pointer to instruction info structure or $0000 in AY
%asm {{
phx
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
ldy #0
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
pha
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
pha
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
pha
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
sta cx16.r4 ; fourth letter in R4 (only exists for the few 4-letter mnemonics)
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
sta cx16.r5 ; fifth letter in R5 (should always be zero or whitespace for a valid mnemonic)
pla
tay
pla
tax
pla
jsr get_opcode_info
plx
rts
}}
}
asmsub opcode(uword instr_info_ptr @AY, ubyte addr_mode @X) clobbers(X) -> ubyte @A, ubyte @Pc {
; -- input: instruction info struct ptr @AY, desired addr_mode @X
; output: opcode @A, valid @carrybit
%asm {{
cpy #0
beq _not_found
sta P8ZP_SCRATCH_W2
sty P8ZP_SCRATCH_W2+1
stx cx16.r15
; debug result address
;sec
;jsr txt.print_uwhex
;lda #13
;jsr c64.CHROUT
ldy #0
lda (P8ZP_SCRATCH_W2),y
beq _multi_addrmodes
iny
lda (P8ZP_SCRATCH_W2),y
cmp cx16.r15 ; check single possible addr.mode
bne _not_found
iny
lda (P8ZP_SCRATCH_W2),y ; get opcode
sec
rts
_not_found lda #0
clc
rts
_multi_addrmodes
ldy cx16.r15
lda (P8ZP_SCRATCH_W2),y ; check opcode for addr.mode
bne _valid
; opcode $00 usually means 'invalid' but for "brk" it is actually valid so check for "brk"
ldy #0
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
cmp #'b'
bne _not_found
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
cmp #'r'
bne _not_found
iny
lda (P8ZP_SCRATCH_W1),y
and #$7f ; lowercase
cmp #'k'
bne _not_found
lda #0
_valid sec
rts
}}
}
%asminclude "opcodes.asm", ""
}

View File

@ -1,180 +1,173 @@
TOTAL_KEYWORDS = 98
MIN_WORD_LENGTH = 3
MAX_WORD_LENGTH = 4
MIN_HASH_VALUE = 2
MAX_HASH_VALUE = 134
def hash(string: str, length: int) -> int:
asso_values = [
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 65, 62,
61, 58, 57, 54, 47, 46, 135, 135, 135, 135,
135, 135, 135, 135, 135, 26, 4, 1, 2, 33,
2, 135, 135, 15, 69, 4, 30, 10, 52, 17,
3, 34, 13, 0, 5, 29, 7, 69, 18, 6,
53, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135, 135,
135, 135, 135, 135, 135, 135, 135, 135, 135 ]
hval = 0
if length > 3:
hval += asso_values[ord(string[3])]
if length > 2:
hval += asso_values[ord(string[2])]
if length > 1:
hval += asso_values[ord(string[1])+1]
hval += asso_values[ord(string[0])]
return hval
wordlist = [
None,
None,
"SBC",
"SEC",
"SED",
"DEC",
"BCS",
"BCC",
"BRK",
"TRB",
"DEY",
"TXS",
"CLC",
"CLD",
"TSB",
"TAY",
"PLP",
"SEI",
"CLV",
"PLY",
None,
"PHP",
"DEX",
None,
"PHY",
None,
"CLI",
"TAX",
"TSX",
"ROR",
"BRA",
"PLX",
"STP",
"INC",
None,
"STY",
"PHX",
"TXA",
"INY",
"PLA",
"BEQ",
"CPY",
"RTS",
"ORA",
"PHA",
"AND",
"ROL",
"STX",
"LSR",
"EOR",
"INX",
"BBS7",
"BBS6",
"CPX",
"BNE",
"STA",
"CMP",
"RTI",
"NOP",
"BBS5",
"ADC",
"ASL",
"BBS4",
"BBS3",
"BBR7",
"BBR6",
"BBS2",
"BBS1",
"BPL",
"LDY",
"BBS0",
"BMI",
"BBR5",
"BVS",
"BVC",
"BBR4",
"BBR3",
None,
"BIT",
"BBR2",
"BBR1",
"LDX",
"STZ",
"BBR0",
"TYA",
None,
None,
"JSR",
"WAI",
"LDA",
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
None,
"SMB7",
"SMB6",
None,
None,
None,
None,
None,
None,
"SMB5",
None,
None,
"SMB4",
"SMB3",
"RMB7",
"RMB6",
"SMB2",
"SMB1",
None,
None,
"SMB0",
None,
"RMB5",
"JMP",
None,
"RMB4",
"RMB3",
None,
None,
"RMB2",
"RMB1",
None,
None,
"RMB0"
]
def in_word_set(string: str) -> bool:
length = len(string)
if 3 <= length <= 4:
wordlist = [
"PHP",
"ROR",
"STP",
"RTS",
"RTI",
"TXS",
"PHY",
"TRB",
"EOR",
"STY",
"PHX",
"TSB",
"TAY",
"STX",
"BRK",
"LSR",
"TAX",
"TSX",
"PHA",
"PLP",
"BRA",
"STA",
"ROL",
"BCS",
"SEI",
"TXA",
"LDY",
"PLY",
"INY",
"LDX",
"PLX",
"NOP",
"INX",
"CLI",
"ASL",
"SBC",
"BMI",
"LDA",
"PLA",
"ORA",
"BNE",
"ADC",
"BBS7",
"BBR7",
"CMP",
"CPY",
"INC",
"SEC",
"BCC",
"CPX",
"BPL",
"DEY",
"TYA",
"CLV",
"DEX",
"CLC",
"BBS6",
"BBR6",
"BBS5",
"BBR5",
"SMB7",
"RMB7",
"STZ",
"SED",
"BBS4",
"BBR4",
"DEC",
"BBS3",
"BBR3",
"BBS2",
"BBR2",
"AND",
"CLD",
"BBS1",
"BBR1",
"BEQ",
"SMB6",
"RMB6",
"SMB5",
"RMB5",
"BBS0",
"BBR0",
"BVS",
"WAI",
"SMB4",
"RMB4",
"JSR",
"SMB3",
"RMB3",
"SMB2",
"RMB2",
"BIT",
"SMB1",
"RMB1",
"SMB0",
"RMB0",
"BVC",
"JMP"
]
lookup = [
-1, 0, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10,
11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24,
25, 26, 27, -1, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37,
38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -1, 50,
51, 52, -1, 53, 54, 55, -1, 56, 57, 58, 59, -1, 60, 61,
62, 63, 64, 65, 66, 67, 68, 69, 70, 71, -1, 72, 73, 74,
75, 76, 77, 78, 79, 80, 81, 82, -1, 83, 84, 85, 86, 87,
88, 89, 90, 91, -1, -1, 92, 93, -1, -1, -1, -1, -1, 94,
95, -1, -1, -1, -1, 96, -1, -1, -1, -1, -1, -1, -1, 97
]
asso_values = [
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 73, 66,
61, 59, 56, 49, 47, 30, 126, 126, 126, 126,
126, 126, 126, 126, 126, 10, 3, 13, 23, 9,
25, 126, 126, 1, 90, 7, 12, 22, 35, 23,
0, 28, 1, 0, 4, 4, 12, 88, 6, 4,
33, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126, 126,
126, 126, 126, 126, 126, 126, 126, 126, 126
]
print(len(lookup))
print(len(asso_values))
def hash(string: str, length: len) -> int:
return asso_values[ord(string[2])] + \
asso_values[ord(string[1])+1] + \
asso_values[ord(string[0])] + \
asso_values[ord(string[length - 1])]
MAX_HASH_VALUE = 125
if 3<=length<=4:
key = hash(string, length)
if key <= MAX_HASH_VALUE:
word = wordlist[key]
return word and word==string
index = lookup[key]
if index>=0:
word = wordlist[index]
return word==string
return False
# if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
# {
# register unsigned int key = hash (str, len);
#
# if (key <= MAX_HASH_VALUE)
# {
# register int index = lookup[key];
#
# if (index >= 0)
# {
# register const char *s = wordlist[index];
#
# if (*str == *s && !strcmp (str + 1, s + 1))
# return s;
# }
# }
# }
# return 0;
# }