diff --git a/compiler/res/prog8lib/string.p8 b/compiler/res/prog8lib/string.p8 index 826ad5fa7..9e673d645 100644 --- a/compiler/res/prog8lib/string.p8 +++ b/compiler/res/prog8lib/string.p8 @@ -365,8 +365,8 @@ fail clc ; yes, no match found, return with c=0 asmsub hash(str string @R0) -> ubyte @A { ; experimental 8 bit hashing function. - ; hash(-1)=179; hash(i) = ROL hash(i-1) XOR string[i] - ; (experimental because the quality of the resulting hash value still has to be determined) + ; hash(-1)=179; clear carry; hash(i) = ROL hash(i-1) XOR string[i] + ; On the English word list in /usr/share/dict/words it seems to have a pretty even distribution %asm {{ lda #179 sta P8ZP_SCRATCH_REG diff --git a/compiler/res/prog8lib/virtual/string.p8 b/compiler/res/prog8lib/virtual/string.p8 index a57e722c7..6284cb780 100644 --- a/compiler/res/prog8lib/virtual/string.p8 +++ b/compiler/res/prog8lib/virtual/string.p8 @@ -164,6 +164,7 @@ string { ; (experimental because the quality of the resulting hash value still has to be determined) ubyte hashcode = 179 ubyte ix + sys.clear_carry() repeat { if st[ix] { rol(hashcode) diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index 4653cf645..44e61a3b5 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -278,7 +278,9 @@ Provides string manipulation routines. ``hash (string) -> ubyte`` Returns a simple 8 bit hash value for the given string. - *Experimental.* The quality of the resulting hash value still has to be determined. + The formula is: hash(-1)=179; clear carry; hash(i) = ROL hash(i-1) XOR string[i] + (where ROL is the cpu ROL instruction) + On the English word list in /usr/share/dict/words it seems to have a pretty even distribution. floats diff --git a/docs/source/todo.rst b/docs/source/todo.rst index b79ffce3f..7a46147f9 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -11,7 +11,7 @@ Need help with ^^^^^^^^^^^^^^ - getting the IR in shape for code generation - atari target: more details details about the machine, fixing library routines. I have no clue whatsoever. -- see the :ref:`portingguide` for details on what information is needed. + See the :ref:`portingguide` for details on what information is needed. Future Things and Ideas @@ -35,12 +35,12 @@ Compiler: - (need separate step in codegen and IR to write the "golden" variables) - do we need (array)variable alignment tag instead of block alignment tag? You want to align the data, not the code in the block? -- ir: block alignment doesn't translate well to variables in the block (the actual stuff that needs to be aligned in memory) but: need variable alignment tag instead of block alignment tag, really +- ir: related to the one above: block alignment doesn't translate well to variables in the block (the actual stuff that needs to be aligned in memory) but: need variable alignment tag instead of block alignment tag, really - ir: idea: (but LLVM IR simply keeps the variables, so not a good idea then?...): replace all scalar variables by an allocated register. Keep a table of the variable to register mapping (including the datatype) global initialization values are simply a list of LOAD instructions. Variables replaced include all subroutine parameters! So the only variables that remain as variables are arrays and strings. - ir: add more optimizations in IRPeepholeOptimizer -- ir: for expressions with array indexes that occur multiple times, can we avoid loading them into new virtualregs everytime and just reuse a single virtualreg as indexer? (simple form of common subexpression elimination) +- ir: for expressions with array indexes that occur multiple times, can we avoid loading them into new virtualregs everytime and just reuse a single virtualreg as indexer? (this is a form of common subexpression elimination) - ir: the @split arrays are currently also split in _lsb/_msb arrays in the IR, and operations take multiple (byte) instructions that may lead to verbose and slow operation and machine code generation down the line. maybe another representation is needed once actual codegeneration is done from the IR...? - PtAst/IR: more complex common subexpression eliminations diff --git a/examples/test.p8 b/examples/test.p8 index 5732c61af..b32cd75de 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,26 +1,17 @@ %import textio -%import floats +%import string %zeropage basicsafe main { sub start() { - ubyte b = 4 - ubyte b2 = 4 - uword w = 4 - uword w2 = 4 - float c - %asm {{ - nop - }} - c += b*b - floats.print_f(c) - txt.nl() - c=0 - %asm {{ - nop - }} - c += w*w - floats.print_f(c) - txt.nl() + str[] names = ["a", "aa", "harry", "the Quick Brown Fox jumps Over the LAZY dog!"] + + uword name + for name in names { + txt.print_ub(string.hash(name)) + txt.spc() + txt.print(name) + txt.nl() + } } }