From fbe231793b8d851ae41be2aaf320808fc02b7468 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sat, 16 Mar 2024 00:45:25 +0100 Subject: [PATCH] optimized and added "streaming" crc32 and crc16 routines to math module. Return value is put in different register now! r14+r15 instead of r0+r1! --- compiler/res/prog8lib/math.p8 | 160 ++++++++++++++++++++------ compiler/res/prog8lib/virtual/math.p8 | 106 ++++++++++++----- docs/source/libraries.rst | 14 ++- examples/cx16/diskspeed.p8 | 10 +- examples/test.p8 | 48 +++++--- 5 files changed, 248 insertions(+), 90 deletions(-) diff --git a/compiler/res/prog8lib/math.p8 b/compiler/res/prog8lib/math.p8 index 71073f777..b472d8897 100644 --- a/compiler/res/prog8lib/math.p8 +++ b/compiler/res/prog8lib/math.p8 @@ -504,47 +504,137 @@ log2_tab sub crc16(uword data, uword length) -> uword { ; calculates the CRC16 (XMODEM) checksum of the buffer. - cx16.r1 = data ; make sure pointer is in zp (on cx16) - cx16.r0 = 0 ; the crc value - repeat length { - cx16.r0H ^= @(cx16.r1) - repeat 8 { - if cx16.r0H & $80 !=0 { - cx16.r0 <<= 1 - cx16.r0 ^= $1021 - } - else - cx16.r0<<=1 - } - cx16.r1++ + ; There are also "streaming" crc16_start/update/end routines below, that allow you to calculate crc16 for data that doesn't fit in a single memory block. + crc16_start() + cx16.r13 = data + cx16.r14 = data+length + while cx16.r13!=cx16.r14 { + crc16_update(@(cx16.r13)) + cx16.r13++ } - return cx16.r0 + return crc16_end() + } + + sub crc16_start() { + ; start the "streaming" crc16 + ; note: tracks the crc16 checksum in cx16.r15! + ; if your code uses that, it must save/restore it before calling this routine + cx16.r15 = 0 + } + + asmsub crc16_update(ubyte value @A) { + ; update the "streaming" crc16 with next byte value + ; note: tracks the crc16 checksum in cx16.r15! + ; if your code uses that, it must save/restore it before calling this routine + %asm {{ + eor cx16.r15H + sta cx16.r15H + ldy #8 +- lda cx16.r15H + asl cx16.r15L + rol cx16.r15H + and #$80 + beq + + lda cx16.r15H + eor #$10 + sta cx16.r15H + lda cx16.r15L + eor #$21 + sta cx16.r15L ++ dey + bne - + rts + }} +; orignal prog8 code was: +; cx16.r15H ^= value +; repeat 8 { +; if cx16.r15H & $80 !=0 { +; cx16.r15 <<=1 +; cx16.r15 ^= $1021 +; } else +; cx16.r15<<=1 +; } + } + + sub crc16_end() -> uword { + ; finalize the "streaming" crc16, returns resulting crc16 value + return cx16.r15 } sub crc32(uword data, uword length) { ; Calculates the CRC-32 (POSIX) checksum of the buffer. ; because prog8 doesn't have 32 bits integers, we have to split up the calculation over 2 words. - ; result stored in cx16.r0 (low word) and cx16.r1 (high word) - cx16.r2 = data ; make sure pointer is in zp (on cx16) - cx16.r1 = 0 - cx16.r0 = 0 - repeat length { - cx16.r1H ^= @(cx16.r2) - repeat 8 { - if cx16.r1H & $80 !=0 { - cx16.r0 <<= 1 - rol(cx16.r1) - cx16.r1 ^= $04c1 - cx16.r0 ^= $1db7 - } - else { - cx16.r0 <<= 1 - rol(cx16.r1) - } - } - cx16.r2++ + ; result stored in cx16.r14 (low word) and cx16.r15 (high word) + ; There are also "streaming" crc32_start/update/end routines below, that allow you to calculate crc32 for data that doesn't fit in a single memory block. + crc32_start() + cx16.r12 = data + cx16.r13 = data+length + while cx16.r12!=cx16.r13 { + crc32_update(@(cx16.r12)) + cx16.r12++ } - cx16.r1 ^= $ffff - cx16.r0 ^= $ffff + crc32_end() + } + + sub crc32_start() { + ; start the "streaming" crc32 + ; note: tracks the crc32 checksum in cx16.r14 and cx16.r15! + ; if your code uses these, it must save/restore them before calling this routine + cx16.r14 = cx16.r15 = 0 + } + + asmsub crc32_update(ubyte value @A) { + ; update the "streaming" crc32 with next byte value + ; note: tracks the crc32 checksum in cx16.r14 and cx16.r15! + ; if your code uses these, it must save/restore them before calling this routine + %asm {{ + eor cx16.r15H + sta cx16.r15H + ldy #8 +- lda cx16.r15H + asl cx16.r14L + rol cx16.r14H + rol cx16.r15L + rol cx16.r15H + and #$80 + beq + + lda cx16.r15H + eor #$04 + sta cx16.r15H + lda cx16.r15L + eor #$c1 + sta cx16.r15L + lda cx16.r14H + eor #$1d + sta cx16.r14H + lda cx16.r14L + eor #$b7 + sta cx16.r14L ++ dey + bne - + rts + }} +; original prog8 code: +; cx16.r15H ^= value +; repeat 8 { +; if cx16.r15H & $80 !=0 { +; cx16.r14 <<= 1 +; rol(cx16.r15) +; cx16.r15 ^= $04c1 +; cx16.r14 ^= $1db7 +; } +; else { +; cx16.r14 <<= 1 +; rol(cx16.r15) +; } +; } + + } + + sub crc32_end() { + ; finalize the "streaming" crc32 + ; result stored in cx16.r14 (low word) and cx16.r15 (high word) + cx16.r15 ^= $ffff + cx16.r14 ^= $ffff } } diff --git a/compiler/res/prog8lib/virtual/math.p8 b/compiler/res/prog8lib/virtual/math.p8 index ff70e2477..e7087710b 100644 --- a/compiler/res/prog8lib/virtual/math.p8 +++ b/compiler/res/prog8lib/virtual/math.p8 @@ -313,43 +313,87 @@ math { sub crc16(uword data, uword length) -> uword { ; calculates the CRC16 (XMODEM) checksum of the buffer. - cx16.r0 = 0 ; the crc value - repeat length { - cx16.r0H ^= @(data) - repeat 8 { - if cx16.r0H & $80 !=0 - cx16.r0 = (cx16.r0<<1)^$1021 - else - cx16.r0<<=1 - } - data++ + ; There are also "streaming" crc16_start/update/end routines below, that allow you to calculate crc32 for data that doesn't fit in a single memory block. + crc16_start() + cx16.r13 = data + cx16.r14 = data+length + while cx16.r13!=cx16.r14 { + crc16_update(@(cx16.r13)) + cx16.r13++ } - return cx16.r0 + return crc16_end() + } + + sub crc16_start() { + ; start the "streaming" crc16 + ; note: tracks the crc16 checksum in cx16.r15! + ; if your code uses that, it must save/restore it before calling this routine + cx16.r15 = 0 + } + + sub crc16_update(ubyte value) { + ; update the "streaming" crc16 with next byte value + ; note: tracks the crc16 checksum in cx16.r15! + ; if your code uses that, it must save/restore it before calling this routine + cx16.r15H ^= value + repeat 8 { + if cx16.r15H & $80 !=0 + cx16.r15 = (cx16.r15<<1)^$1021 + else + cx16.r15<<=1 + } + } + + sub crc16_end() -> uword { + ; finalize the "streaming" crc16, returns resulting crc16 value + return cx16.r15 } sub crc32(uword data, uword length) { ; Calculates the CRC-32 (POSIX) checksum of the buffer. ; because prog8 doesn't have 32 bits integers, we have to split up the calculation over 2 words. - ; result stored in cx16.r0 (low word) and cx16.r1 (high word) - cx16.r1 = 0 - cx16.r0 = 0 - repeat length { - cx16.r1H ^= @(data) - repeat 8 { - if cx16.r1H & $80 !=0 { - cx16.r0 <<= 1 - rol(cx16.r1) - cx16.r1 ^= $04c1 - cx16.r0 ^= $1db7 - } - else { - cx16.r0 <<= 1 - rol(cx16.r1) - } - } - data++ + ; result stored in cx16.r14 (low word) and cx16.r15 (high word) + ; There are also "streaming" crc32_start/update/end routines below, that allow you to calculate crc32 for data that doesn't fit in a single memory block. + crc32_start() + cx16.r12 = data + cx16.r13 = data+length + while cx16.r12!=cx16.r13 { + crc32_update(@(cx16.r12)) + cx16.r12++ } - cx16.r1 ^= $ffff - cx16.r0 ^= $ffff + crc32_end() + } + + sub crc32_start() { + ; start the "streaming" crc32 + ; note: tracks the crc32 checksum in cx16.r14 and cx16.r15! + ; if your code uses these, it must save/restore them before calling this routine + cx16.r14 = cx16.r15 = 0 + } + + sub crc32_update(ubyte value) { + ; update the "streaming" crc32 with next byte value + ; note: tracks the crc32 checksum in cx16.r14 and cx16.r15! + ; if your code uses these, it must save/restore them before calling this routine + cx16.r15H ^= value + repeat 8 { + if cx16.r15H & $80 !=0 { + cx16.r14 <<= 1 + rol(cx16.r15) + cx16.r15 ^= $04c1 + cx16.r14 ^= $1db7 + } + else { + cx16.r14 <<= 1 + rol(cx16.r15) + } + } + } + + sub crc32_end() { + ; finalize the "streaming" crc32 + ; result stored in cx16.r14 (low word) and cx16.r15 (high word) + cx16.r15 ^= $ffff + cx16.r14 ^= $ffff } } diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index 4342b83df..b05a5fd0c 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -532,12 +532,22 @@ but perhaps the provided ones can be of service too. ``crc16 (uword data, uword length) -> uword`` Returns a CRC-16 (XMODEM) checksum over the given data buffer. Note: on the Commander X16, there is a CRC-16 routine in the kernal: cx16.memory_crc(). - That one is faster, but yields different results. It is unclear what flavour of crc it is calculating. + That one is faster, but yields different results. It is unclear to me what flavour of crc it is calculating. + +``crc16_start() / crc16_update(ubyte value) / crc16_end() -> uword`` + "streaming" crc16 calculation routines, when the data doesn't fit in a single buffer. + Tracks the crc16 checksum in cx16.r15! If your code uses that, it must save/restore it before calling this routine! + Call the start() routine first, feed it bytes with the update() routine, finalize with calling the end() routine which returns the crc16 value. ``crc32 (uword data, uword length)`` Calculates a CRC-32 (POSIX) checksum over the given data buffer. - The 32 bits result is stored in cx16.r0 (low word) and cx16.r1 (high word). + The 32 bits result is stored in cx16.r14 (low word) and cx16.r15 (high word). +``crc32_start() / crc32_update(ubyte value) / crc32_end()`` + "streaming" crc32 calculation routines, when the data doesn't fit in a single buffer. + Tracks the crc32 checksum in cx16.r14 and cx16.r15! If your code uses these, it must save/restore them before calling this routine! + Call the start() routine first, feed it bytes with the update() routine, finalize with calling the end() routine. + The 32 bits result is stored in cx16.r14 (low word) and cx16.r15 (high word). cx16logo -------- diff --git a/examples/cx16/diskspeed.p8 b/examples/cx16/diskspeed.p8 index 1d9d13b02..9be27dccc 100644 --- a/examples/cx16/diskspeed.p8 +++ b/examples/cx16/diskspeed.p8 @@ -24,8 +24,8 @@ main { large[cx16.r0] = math.rnd() } math.crc32(large, 20000) - uword crc32_l = cx16.r0 - uword crc32_h = cx16.r1 + uword crc32_l = cx16.r14 + uword crc32_h = cx16.r15 } txt.print("\n\x12diskio.save()\x92 writing 10*20kb=200kb total") @@ -168,9 +168,9 @@ main { sub compare_crc32(uword ptr, uword size, uword crc32_low, uword crc32_high) { math.crc32(ptr, size) - if cx16.r0!=crc32_low or cx16.r1!=crc32_high { - txt.print_uwhex(cx16.r1, true) - txt.print_uwhex(cx16.r0, false) + if cx16.r14!=crc32_low or cx16.r15!=crc32_high { + txt.print_uwhex(cx16.r15, true) + txt.print_uwhex(cx16.r14, false) txt.nl() txt.print_uwhex(crc32_high, true) txt.print_uwhex(crc32_low, false) diff --git a/examples/test.p8 b/examples/test.p8 index 8f4cd8b0e..bd656ca3e 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,27 +1,41 @@ +%import math %import textio %zeropage basicsafe %option no_sysinit +; $029f + main { sub start() { - cx16.reset_system() - repeat { - for cx16.r0L in 0 to 255 { - cx16.set_led_brightness(cx16.r0L) - delay() - } - for cx16.r0L in 255 downto 0 { - cx16.set_led_brightness(cx16.r0L) - delay() - } - } - } + txt.print("crc16\n") + txt.print_uwhex(math.crc16($0800, 32768), true) + txt.nl() - sub delay() { - repeat 2000 { - %asm {{ - nop - }} + cx16.r15 = 0 + math.crc16_start() + for cx16.r9 in $0800 to $0800+32768-1 { + math.crc16_update(@(cx16.r9)) } + txt.print_uwhex(math.crc16_end(), true) + txt.nl() + + txt.print("crc32\n") + cx16.r0 = cx16.r1 = 0 + math.crc32($0800, 32768) + txt.print_uwhex(cx16.r15, true) + txt.print_uwhex(cx16.r14, false) + txt.nl() + + cx16.r0 = cx16.r1 = 0 + math.crc32_start() + for cx16.r9 in $0800 to $0800+32768-1 { + math.crc32_update(@(cx16.r9)) + } + math.crc32_end() + txt.print_uwhex(cx16.r15, true) + txt.print_uwhex(cx16.r14, false) + txt.nl() } } + +