From 43059841684b21707cfa555f9186b7fa5d2f2a24 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Wed, 6 Jan 2021 00:23:54 +0100 Subject: [PATCH] assem --- examples/cx16/assembler/Makefile | 2 +- examples/cx16/assembler/assem.p8 | 77 ---- .../cx16/assembler/benchmark-treematch.p8 | 167 +++++++++ examples/cx16/assembler/perfecthash.py | 341 +++++++++--------- 4 files changed, 335 insertions(+), 252 deletions(-) create mode 100644 examples/cx16/assembler/benchmark-treematch.p8 diff --git a/examples/cx16/assembler/Makefile b/examples/cx16/assembler/Makefile index 1c3a152cf..7412c743a 100644 --- a/examples/cx16/assembler/Makefile +++ b/examples/cx16/assembler/Makefile @@ -1,7 +1,7 @@ all: perfecthash.c opcodes.asm perfecthash.c: gen_opcodes.py - python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -E -G -m 100 > perfecthash.c + python gen_opcodes.py --mnemlist | gperf --no-strlen --null-strings -7 -C -D -E -m 100 > perfecthash.c opcodes.asm: gen_opcodes.py python gen_opcodes.py --parser > opcodes.asm diff --git a/examples/cx16/assembler/assem.p8 b/examples/cx16/assembler/assem.p8 index 22c06a628..c51721a2d 100644 --- a/examples/cx16/assembler/assem.p8 +++ b/examples/cx16/assembler/assem.p8 @@ -12,7 +12,6 @@ main { txt.print("\nAssembler.\nEmpty line to stop.\n") textparse.user_input() - ; benchmark.benchmark() ; test_stack.test() } @@ -198,14 +197,12 @@ textparse { } instructions.am_Imm -> { ; lda #$12 - terminate_number(operand_ptr+1) cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true } instructions.am_Zp -> { ; lda $02 - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true @@ -213,7 +210,6 @@ textparse { instructions.am_Zpr -> { ; brr0 $12,label ; TODO parse the label, relative offset - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true @@ -221,14 +217,12 @@ textparse { instructions.am_ZpX, instructions.am_ZpY -> { ; lda $02,x / lda $02,y ; TODO parse the ,x/y - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_Rel -> { ; bcc $c000 - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) ; TODO calcualate relative offset to current programcounter debug_print_value(operand_ptr) @@ -236,7 +230,6 @@ textparse { } instructions.am_Abs -> { ; jmp $1234 - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true @@ -244,14 +237,12 @@ textparse { instructions.am_AbsX, instructions.am_AbsY -> { ; sta $3000,x / sta $3000,y ; TODO parse the ,x/,y - terminate_number(operand_ptr) cx16.r0 = conv.any2uword(operand_ptr) debug_print_value(operand_ptr) return true } instructions.am_Ind -> { ; jmp ($fffc) - terminate_number(operand_ptr+1) cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true @@ -259,14 +250,12 @@ textparse { instructions.am_IzX, instructions.am_IzY, instructions.am_IaX -> { ; lda ($02,x) / lda ($02),y / jmp ($a000,x) ; TODO parse the ,x/,y - terminate_number(operand_ptr+1) cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true } instructions.am_Izp -> { ; lda ($02) - terminate_number(operand_ptr+1) cx16.r0 = conv.any2uword(operand_ptr+1) debug_print_value(operand_ptr+1) return true @@ -284,22 +273,6 @@ textparse { } } - sub terminate_number(uword strptr) { - ; replace the first terminating character after a number (such as a , or close parens) - ; with a 0 to terminate the number and make the parse routine happy. - ; TODO remove this once the various conv routines are more robust and stop at a non-digit - repeat { - when @(strptr) { - 0 -> return - ',', ')', ' ', 9, '\n' -> { - @(strptr) = 0 - return - } - } - strptr++ - } - } - sub split_input() { ; first strip the input string of extra whitespace and comments ubyte copying_word = false @@ -378,56 +351,6 @@ textparse { } } -benchmark { - sub benchmark() { - str[20] mnemonics = ["lda", "ldx", "ldy", "jsr", "bcs", "rts", "lda", "ora", "and", "eor", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop"] - ubyte[20] modes = [3, 4, 8, 8, 7, 1, 12, 13, 5, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] - uword valid = 0 - - const uword iterations = 40000 / len(mnemonics) - const uword amount = iterations * len(mnemonics) - - txt.print("Benchmark.\nMatching ") - txt.print_uw(amount) - txt.print(" mnemonics") - - c64.SETTIM(0,0,0) - - uword total = 0 - repeat iterations { - if lsb(total)==0 - txt.chrout('.') - ubyte idx - for idx in 0 to len(mnemonics)-1 { - uword instr_info = instructions.match(mnemonics[idx]) - ubyte opcode = instructions.opcode(instr_info, modes[idx]) - if_cs - valid++ - total++ - } - } - - uword current_time = c64.RDTIM16() - txt.print("\nDone.\nValid: ") - txt.print_uw(valid) - txt.print("\ninvalid: ") - txt.print_uw(amount-valid) - txt.print("\ntotal: ") - txt.print_uw(total) - txt.print("\nSeconds:") - uword secs = current_time / 60 - current_time = (current_time - secs*60)*1000/60 - txt.print_uw(secs) - txt.chrout('.') - if current_time<10 - txt.chrout('0') - if current_time<100 - txt.chrout('0') - txt.print_uw(current_time) - txt.chrout('\n') - } -} - instructions { const ubyte am_Invalid = 0 const ubyte am_Imp = 1 diff --git a/examples/cx16/assembler/benchmark-treematch.p8 b/examples/cx16/assembler/benchmark-treematch.p8 new file mode 100644 index 000000000..a23c46cc3 --- /dev/null +++ b/examples/cx16/assembler/benchmark-treematch.p8 @@ -0,0 +1,167 @@ +%target cx16 +%import test_stack +%import textio +%zeropage basicsafe +%option no_sysinit + + +main { + + sub start() { + txt.print("\nassembler benchmark - tree match routine\n") + + benchmark.benchmark() + + test_stack.test() + } + +} + +benchmark { + sub benchmark() { + str[20] mnemonics = ["lda", "ldx", "ldy", "jsr", "bcs", "rts", "lda", "ora", "and", "eor", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop", "wai", "nop"] + ubyte[20] modes = [3, 4, 8, 8, 7, 1, 12, 13, 5, 4, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] + uword valid = 0 + + const uword iterations = 40000 / len(mnemonics) + const uword amount = iterations * len(mnemonics) + + txt.print("matching ") + txt.print_uw(amount) + txt.print(" mnemonics") + + c64.SETTIM(0,0,0) + + uword total = 0 + repeat iterations { + if lsb(total)==0 + txt.chrout('.') + ubyte idx + for idx in 0 to len(mnemonics)-1 { + uword instr_info = instructions.match(mnemonics[idx]) + ubyte opcode = instructions.opcode(instr_info, modes[idx]) + if_cs + valid++ + total++ + } + } + + uword current_time = c64.RDTIM16() + txt.print("\nvalid: ") + txt.print_uw(valid) + txt.print("\ninvalid: ") + txt.print_uw(amount-valid) + txt.print("\ntotal: ") + txt.print_uw(total) + txt.print("\n\nseconds:") + uword secs = current_time / 60 + current_time = (current_time - secs*60)*1000/60 + txt.print_uw(secs) + txt.chrout('.') + if current_time<10 + txt.chrout('0') + if current_time<100 + txt.chrout('0') + txt.print_uw(current_time) + txt.chrout('\n') + } +} + +instructions { + asmsub match(uword mnemonic_ptr @AY) -> uword @AY { + ; -- input: mnemonic_ptr in AY, output: pointer to instruction info structure or $0000 in AY + %asm {{ + phx + sta P8ZP_SCRATCH_W1 + sty P8ZP_SCRATCH_W1+1 + ldy #0 + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + pha + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + pha + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + pha + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + sta cx16.r4 ; fourth letter in R4 (only exists for the few 4-letter mnemonics) + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + sta cx16.r5 ; fifth letter in R5 (should always be zero or whitespace for a valid mnemonic) + pla + tay + pla + tax + pla + jsr get_opcode_info + plx + rts + }} + } + + asmsub opcode(uword instr_info_ptr @AY, ubyte addr_mode @X) clobbers(X) -> ubyte @A, ubyte @Pc { + ; -- input: instruction info struct ptr @AY, desired addr_mode @X + ; output: opcode @A, valid @carrybit + %asm {{ + cpy #0 + beq _not_found + sta P8ZP_SCRATCH_W2 + sty P8ZP_SCRATCH_W2+1 + stx cx16.r15 + + ; debug result address + ;sec + ;jsr txt.print_uwhex + ;lda #13 + ;jsr c64.CHROUT + + ldy #0 + lda (P8ZP_SCRATCH_W2),y + beq _multi_addrmodes + iny + lda (P8ZP_SCRATCH_W2),y + cmp cx16.r15 ; check single possible addr.mode + bne _not_found + iny + lda (P8ZP_SCRATCH_W2),y ; get opcode + sec + rts + +_not_found lda #0 + clc + rts + +_multi_addrmodes + ldy cx16.r15 + lda (P8ZP_SCRATCH_W2),y ; check opcode for addr.mode + bne _valid + ; opcode $00 usually means 'invalid' but for "brk" it is actually valid so check for "brk" + ldy #0 + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + cmp #'b' + bne _not_found + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + cmp #'r' + bne _not_found + iny + lda (P8ZP_SCRATCH_W1),y + and #$7f ; lowercase + cmp #'k' + bne _not_found + lda #0 +_valid sec + rts + }} + } + + %asminclude "opcodes.asm", "" +} diff --git a/examples/cx16/assembler/perfecthash.py b/examples/cx16/assembler/perfecthash.py index fcc51aeeb..9f4f27317 100644 --- a/examples/cx16/assembler/perfecthash.py +++ b/examples/cx16/assembler/perfecthash.py @@ -1,180 +1,173 @@ -TOTAL_KEYWORDS = 98 -MIN_WORD_LENGTH = 3 -MAX_WORD_LENGTH = 4 -MIN_HASH_VALUE = 2 -MAX_HASH_VALUE = 134 - - -def hash(string: str, length: int) -> int: - asso_values = [ - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 65, 62, - 61, 58, 57, 54, 47, 46, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 26, 4, 1, 2, 33, - 2, 135, 135, 15, 69, 4, 30, 10, 52, 17, - 3, 34, 13, 0, 5, 29, 7, 69, 18, 6, - 53, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135, 135, - 135, 135, 135, 135, 135, 135, 135, 135, 135 ] - - hval = 0 - if length > 3: - hval += asso_values[ord(string[3])] - if length > 2: - hval += asso_values[ord(string[2])] - if length > 1: - hval += asso_values[ord(string[1])+1] - hval += asso_values[ord(string[0])] - return hval - - -wordlist = [ - None, - None, - "SBC", - "SEC", - "SED", - "DEC", - "BCS", - "BCC", - "BRK", - "TRB", - "DEY", - "TXS", - "CLC", - "CLD", - "TSB", - "TAY", - "PLP", - "SEI", - "CLV", - "PLY", - None, - "PHP", - "DEX", - None, - "PHY", - None, - "CLI", - "TAX", - "TSX", - "ROR", - "BRA", - "PLX", - "STP", - "INC", - None, - "STY", - "PHX", - "TXA", - "INY", - "PLA", - "BEQ", - "CPY", - "RTS", - "ORA", - "PHA", - "AND", - "ROL", - "STX", - "LSR", - "EOR", - "INX", - "BBS7", - "BBS6", - "CPX", - "BNE", - "STA", - "CMP", - "RTI", - "NOP", - "BBS5", - "ADC", - "ASL", - "BBS4", - "BBS3", - "BBR7", - "BBR6", - "BBS2", - "BBS1", - "BPL", - "LDY", - "BBS0", - "BMI", - "BBR5", - "BVS", - "BVC", - "BBR4", - "BBR3", - None, - "BIT", - "BBR2", - "BBR1", - "LDX", - "STZ", - "BBR0", - "TYA", - None, - None, - "JSR", - "WAI", - "LDA", - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - None, - "SMB7", - "SMB6", - None, - None, - None, - None, - None, - None, - "SMB5", - None, - None, - "SMB4", - "SMB3", - "RMB7", - "RMB6", - "SMB2", - "SMB1", - None, - None, - "SMB0", - None, - "RMB5", - "JMP", - None, - "RMB4", - "RMB3", - None, - None, - "RMB2", - "RMB1", - None, - None, - "RMB0" - ] def in_word_set(string: str) -> bool: length = len(string) - if 3 <= length <= 4: + + wordlist = [ + "PHP", + "ROR", + "STP", + "RTS", + "RTI", + "TXS", + "PHY", + "TRB", + "EOR", + "STY", + "PHX", + "TSB", + "TAY", + "STX", + "BRK", + "LSR", + "TAX", + "TSX", + "PHA", + "PLP", + "BRA", + "STA", + "ROL", + "BCS", + "SEI", + "TXA", + "LDY", + "PLY", + "INY", + "LDX", + "PLX", + "NOP", + "INX", + "CLI", + "ASL", + "SBC", + "BMI", + "LDA", + "PLA", + "ORA", + "BNE", + "ADC", + "BBS7", + "BBR7", + "CMP", + "CPY", + "INC", + "SEC", + "BCC", + "CPX", + "BPL", + "DEY", + "TYA", + "CLV", + "DEX", + "CLC", + "BBS6", + "BBR6", + "BBS5", + "BBR5", + "SMB7", + "RMB7", + "STZ", + "SED", + "BBS4", + "BBR4", + "DEC", + "BBS3", + "BBR3", + "BBS2", + "BBR2", + "AND", + "CLD", + "BBS1", + "BBR1", + "BEQ", + "SMB6", + "RMB6", + "SMB5", + "RMB5", + "BBS0", + "BBR0", + "BVS", + "WAI", + "SMB4", + "RMB4", + "JSR", + "SMB3", + "RMB3", + "SMB2", + "RMB2", + "BIT", + "SMB1", + "RMB1", + "SMB0", + "RMB0", + "BVC", + "JMP" + ] + + lookup = [ + -1, 0, -1, 1, 2, 3, -1, 4, 5, 6, 7, 8, 9, 10, + 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, -1, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, + 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, -1, 50, + 51, 52, -1, 53, 54, 55, -1, 56, 57, 58, 59, -1, 60, 61, + 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, -1, 72, 73, 74, + 75, 76, 77, 78, 79, 80, 81, 82, -1, 83, 84, 85, 86, 87, + 88, 89, 90, 91, -1, -1, 92, 93, -1, -1, -1, -1, -1, 94, + 95, -1, -1, -1, -1, 96, -1, -1, -1, -1, -1, -1, -1, 97 + ] + + asso_values = [ + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 73, 66, + 61, 59, 56, 49, 47, 30, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 10, 3, 13, 23, 9, + 25, 126, 126, 1, 90, 7, 12, 22, 35, 23, + 0, 28, 1, 0, 4, 4, 12, 88, 6, 4, + 33, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126, 126, + 126, 126, 126, 126, 126, 126, 126, 126, 126 + ] + + print(len(lookup)) + print(len(asso_values)) + + def hash(string: str, length: len) -> int: + return asso_values[ord(string[2])] + \ + asso_values[ord(string[1])+1] + \ + asso_values[ord(string[0])] + \ + asso_values[ord(string[length - 1])] + + MAX_HASH_VALUE = 125 + + if 3<=length<=4: key = hash(string, length) if key <= MAX_HASH_VALUE: - word = wordlist[key] - return word and word==string + index = lookup[key] + if index>=0: + word = wordlist[index] + return word==string return False + + +# if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH) +# { +# register unsigned int key = hash (str, len); +# +# if (key <= MAX_HASH_VALUE) +# { +# register int index = lookup[key]; +# +# if (index >= 0) +# { +# register const char *s = wordlist[index]; +# +# if (*str == *s && !strcmp (str + 1, s + 1)) +# return s; +# } +# } +# } +# return 0; +# }