From 3ac55a64bfc3a4d43cb9496bddc57e26d542c2c7 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Mon, 6 Mar 2023 21:54:14 -0600 Subject: [PATCH] Use an improved hash function for symbol tables. The hash algorithm has been modified to include a rotate at each step. This should improve the quality of hashes and reduce the number of collisions. However, probably the more important change for performance is to do the modulo computation by repeated subtraction rather than by calling a slow library function. --- CCommon.asm | 44 +++++++++++++++++++++++++------------------- Header.pas | 2 +- 2 files changed, 26 insertions(+), 20 deletions(-) diff --git a/CCommon.asm b/CCommon.asm index 50d8142..d0d7d38 100644 --- a/CCommon.asm +++ b/CCommon.asm @@ -44,37 +44,43 @@ lb1 lda [fromPtr],Y Hash start cc hashSize equ 876 # hash buckets - 1 -sum equ 0 hash +disp equ 0 disp into hash table length equ 2 length of string subroutine (4:sPtr),4 - stz sum default to bucket 0 lda [sPtr] set the length of the string + tax and #$00FF sta length ldy #1 start with char 1 - lda [sPtr] if 1st char is '~', start with char 6 + txa if 1st char is '~', start with char 6 and #$FF00 cmp #'~'*256 - bne lb1 + bne lb0 ldy #6 -lb1 lda [sPtr],Y get the value to add in - and #$3F3F - cpy length if there is only 1 char left then - bne lb2 - and #$00FF and out the high byte -lb2 clc add it to the sum - adc sum - sta sum - iny next char +lb0 lda #0 initial value is 0 + bra lb2 while there are at least 2 chars left +lb1 asl a rotate sum left one bit + adc [sPtr],Y add in next two bytes + iny advance two chars iny - cpy length - ble lb1 - mod2 sum,#hashSize+1 return disp - asl sum - asl sum +lb2 cpy length + blt lb1 + bne lb3 if there is 1 char left then + asl a rotate sum left one bit + sta disp + lda [sPtr],Y + and #$00FF and out the high byte + adc disp add last byte to the sum + sec +lb3 sbc #hashSize+1 disp := (sum mod (hashSize+1)) << 2 + bcs lb3 + adc #hashSize+1 + asl a + asl a + sta disp - return 2:sum + return 2:disp return disp end diff --git a/Header.pas b/Header.pas index ecc81fb..ff91a57 100644 --- a/Header.pas +++ b/Header.pas @@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI; {$segment 'HEADER'} const - symFileVersion = 38; {version number of .sym file format} + symFileVersion = 39; {version number of .sym file format} var inhibitHeader: boolean; {should .sym includes be blocked?}