Use an improved hash function for symbol tables.

The hash algorithm has been modified to include a rotate at each step. This should improve the quality of hashes and reduce the number of collisions. However, probably the more important change for performance is to do the modulo computation by repeated subtraction rather than by calling a slow library function.
This commit is contained in:
Stephen Heumann 2023-03-06 21:54:14 -06:00
parent 3406dbd3ae
commit 3ac55a64bf
2 changed files with 26 additions and 20 deletions

View File

@ -44,37 +44,43 @@ lb1 lda [fromPtr],Y
Hash start cc Hash start cc
hashSize equ 876 # hash buckets - 1 hashSize equ 876 # hash buckets - 1
sum equ 0 hash disp equ 0 disp into hash table
length equ 2 length of string length equ 2 length of string
subroutine (4:sPtr),4 subroutine (4:sPtr),4
stz sum default to bucket 0
lda [sPtr] set the length of the string lda [sPtr] set the length of the string
tax
and #$00FF and #$00FF
sta length sta length
ldy #1 start with char 1 ldy #1 start with char 1
lda [sPtr] if 1st char is '~', start with char 6 txa if 1st char is '~', start with char 6
and #$FF00 and #$FF00
cmp #'~'*256 cmp #'~'*256
bne lb1 bne lb0
ldy #6 ldy #6
lb1 lda [sPtr],Y get the value to add in lb0 lda #0 initial value is 0
and #$3F3F bra lb2 while there are at least 2 chars left
cpy length if there is only 1 char left then lb1 asl a rotate sum left one bit
bne lb2 adc [sPtr],Y add in next two bytes
and #$00FF and out the high byte iny advance two chars
lb2 clc add it to the sum
adc sum
sta sum
iny next char
iny iny
cpy length lb2 cpy length
ble lb1 blt lb1
mod2 sum,#hashSize+1 return disp bne lb3 if there is 1 char left then
asl sum asl a rotate sum left one bit
asl sum sta disp
lda [sPtr],Y
and #$00FF and out the high byte
adc disp add last byte to the sum
sec
lb3 sbc #hashSize+1 disp := (sum mod (hashSize+1)) << 2
bcs lb3
adc #hashSize+1
asl a
asl a
sta disp
return 2:sum return 2:disp return disp
end end

View File

@ -18,7 +18,7 @@ uses CCommon, MM, Scanner, Symbol, CGI;
{$segment 'HEADER'} {$segment 'HEADER'}
const const
symFileVersion = 38; {version number of .sym file format} symFileVersion = 39; {version number of .sym file format}
var var
inhibitHeader: boolean; {should .sym includes be blocked?} inhibitHeader: boolean; {should .sym includes be blocked?}