added sorting library for target virtual

added sorting routines that sort a values array together with the keys array optimized gnomesort a little
2026-04-21 17:16:33 +00:00 · 2025-06-07 00:51:51 +02:00
parent bebe60b687
commit e8795859c5
7 changed files with 277 additions and 68 deletions
@@ -36,6 +36,12 @@
 ; - IMPORTANT:  if you add the same subroutine multiple times, IT CANNOT DEPEND ON ANY LOCAL VARIABLES OR R0-R15 TO KEEP STATE. NOT EVEN REPEAT LOOP COUNTERS.
 ;   Those are all shared in the different tasks! You HAVE to use a mechanism around the userdata value (pointer?) to keep separate state elsewhere!
 ; - IMPORTANT:  ``defer`` cannot be used inside a coroutine that is reused for multiple tasks!!!
+;
+; TIP: HOW TO WAIT without BLOCKING other coroutines?
+; Make sure you call yield() in the waiting loop, for example:
+;            uword timer = cbm.RDTIM16() + 60
+;            while cbm.RDTIM16() != timer
+;                void coroutines.yield()

 coroutines {
    %option ignore_unused
@@ -43,49 +43,72 @@ _done
        }}
    }

-    /*
-    prog8 source code for the above routine:
-
-    sub gnomesort_ub(uword @requirezp values, ubyte num_elements) {
+    sub gnomesort_by_ub(uword @requirezp uw_keys, uword values, ubyte num_elements) {
+        ; sorts the 'wordvalues' array (no-split array of words) according to the 'ub_keys' array (which also gets sorted ofcourse).
        ubyte @zp pos=1
        while pos != num_elements {
-            if values[pos]>=values[pos-1]
+            if uw_keys[pos]>=uw_keys[pos-1]
                pos++
            else {
                ; swap elements
-                cx16.r0L = values[pos-1]
-                values[pos-1] = values[pos]
-                values[pos] = cx16.r0L
+                cx16.r0L = uw_keys[pos-1]
+                uw_keys[pos-1] = uw_keys[pos]
+                uw_keys[pos] = cx16.r0L
+                uword @requirezp vptr = values + pos*$0002 -2
+                cx16.r0 = peekw(vptr)
+                pokew(vptr, peekw(vptr+2))
+                pokew(vptr+2, cx16.r0)
+
                pos--
                if_z
                    pos++
            }
        }
    }
-    */

-    sub gnomesort_uw(uword values, ubyte num_elements) {
-        ; When written in asm this is 10-20% faster, but unreadable. Not worth it.
-        ; Also, sorting just an array of word numbers is very seldomly used, most often you
-        ; need to sort other things associated with it as well and that is not done here anyway,
-        ; so requires a custom user coded sorting routine anyway.
-        ubyte @zp pos = 1
-        uword @requirezp ptr = values+2
+    sub gnomesort_uw(uword @requirezp values, ubyte num_elements) {
+        ; Sorts the values array (no-split unsigned words).
+        ; Max number of elements is 128. Clobbers R0 and R1.
+        ubyte @zp pos=2
+        num_elements *= 2
        while pos != num_elements {
-            cx16.r0 = peekw(ptr-2)
-            cx16.r1 = peekw(ptr)
-            if cx16.r0<=cx16.r1 {
-                pos++
-                ptr+=2
-            }
+            cx16.r1L = pos-2
+            if peekw(values+pos) >= peekw(values + cx16.r1L)
+                pos += 2
            else {
                ; swap elements
-                pokew(ptr-2, cx16.r1)
-                pokew(ptr, cx16.r0)
-                if pos>1 {
-                    pos--
-                    ptr-=2
-                }
+                cx16.r0 = peekw(values + cx16.r1L)
+                pokew(values + cx16.r1L, peekw(values + pos))
+                pokew(values + pos, cx16.r0)
+                pos-=2
+                if_z
+                    pos+=2
+            }
+        }
+    }
+
+    sub gnomesort_by_uw(uword @requirezp uw_keys, uword wordvalues, ubyte num_elements) {
+        ; Sorts the 'wordvalues' array according to the 'uw_keys' array (which also gets sorted ofcourse).
+        ; both arrays should be no-split array of words. uw_keys are unsigned.
+        ; Max number of elements is 128. Clobbers R0 and R1.
+        ubyte @zp pos=2
+        num_elements *= 2
+        while pos != num_elements {
+            cx16.r1L = pos-2
+            if peekw(uw_keys+pos) >= peekw(uw_keys + cx16.r1L)
+                pos += 2
+            else {
+                ; swap elements
+                cx16.r0 = peekw(uw_keys + cx16.r1L)
+                pokew(uw_keys + cx16.r1L, peekw(uw_keys+ pos))
+                pokew(uw_keys + pos, cx16.r0)
+                cx16.r0 = peekw(wordvalues + cx16.r1L)
+                pokew(wordvalues + cx16.r1L, peekw(wordvalues + pos))
+                pokew(wordvalues + pos, cx16.r0)
+
+                pos-=2
+                if_z
+                    pos+=2
            }
        }
    }
@@ -93,6 +116,7 @@ _done
    ; gnomesort_pointers is not worth it over shellshort_pointers.

    sub shellsort_ub(uword @requirezp values, ubyte num_elements) {
+        ; sorts the values array (unsigned bytes).
        num_elements--
        ubyte @zp gap
        for gap in [132, 57, 23, 10, 4, 1] {
@@ -115,6 +139,7 @@ _done
    }

    sub shellsort_uw(uword @requirezp values, ubyte num_elements) {
+        ; sorts the values array (no-split unsigned words).
        num_elements--
        ubyte gap
        for gap in [132, 57, 23, 10, 4, 1] {
@@ -124,13 +149,65 @@ _done
                ubyte @zp j = i
                ubyte @zp k = j-gap
                while j>=gap {
-                    uword @zp v = peekw(values+k*2)
+                    uword @zp v = peekw(values+k*$0002)
                    if v <= temp break
-                    pokew(values+j*2, v)
+                    pokew(values+j*$0002, v)
                    j = k
                    k -= gap
                }
-                pokew(values+j*2, temp)
+                pokew(values+j*$0002, temp)
+            }
+        }
+    }
+
+    sub shellsort_by_ub(uword @requirezp ub_keys, uword @requirezp wordvalues, ubyte num_elements) {
+        ; sorts the 'wordvalues' array (no-split array of words) according to the 'ub_keys' array (which also gets sorted ofcourse).
+        num_elements--
+        ubyte @zp gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                ubyte @zp temp = ub_keys[i]
+                uword temp_wv = peekw(wordvalues + i*$0002)
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                repeat {
+                    ubyte @zp v = ub_keys[k]
+                    if v <= temp break
+                    if j < gap break
+                    ub_keys[j] = v
+                    pokew(wordvalues + j*$0002, peekw(wordvalues + k*$0002))
+                    j = k
+                    k -= gap
+                }
+                ub_keys[j] = temp
+                pokew(wordvalues + j*$0002, temp_wv)
+            }
+        }
+    }
+
+    sub shellsort_by_uw(uword @requirezp uw_keys, uword @requirezp wordvalues, ubyte num_elements) {
+        ; sorts the 'wordvalues' array according to the 'uw_keys' array (which also gets sorted ofcourse).
+        ; both arrays should be no-split array of words. uw_keys are unsigned.
+        num_elements--
+        ubyte gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                uword @zp temp = peekw(uw_keys+i*$0002)
+                uword temp_wv = peekw(wordvalues + i*$0002)
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                while j>=gap {
+                    uword @zp v = peekw(uw_keys+k*2)
+                    if v <= temp break
+                    pokew(uw_keys+j*2, v)
+                    pokew(wordvalues + j*$0002, peekw(wordvalues + k*$0002))
+                    j = k
+                    k -= gap
+                }
+                pokew(uw_keys+j*2, temp)
+                pokew(wordvalues + j*$0002, temp_wv)
            }
        }
    }
@@ -147,14 +224,14 @@ _done
                ubyte @zp j = i
                ubyte @zp k = j-gap
                while j>=gap {
-                    cx16.r0 = peekw(pointers+k*2)
+                    cx16.r0 = peekw(pointers+k*$0002)
                    void call(comparefunc)
                    if_cs break
-                    pokew(pointers+j*2, cx16.r0)
+                    pokew(pointers+j*$0002, cx16.r0)
                    j = k
                    k -= gap
                }
-                pokew(pointers+j*2, cx16.r1)
+                pokew(pointers+j*$0002, cx16.r1)
            }
        }
    }
@@ -0,0 +1,103 @@
+; **experimental** data sorting routines, API subject to change!!
+
+; NOTE: gnomesort is not implemented here, just use shellshort.
+
+sorting {
+    %option ignore_unused
+
+    sub shellsort_ub(uword @requirezp values, ubyte num_elements) {
+        num_elements--
+        ubyte @zp gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                ubyte @zp temp = values[i]
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                repeat {
+                    ubyte @zp v = values[k]
+                    if v <= temp break
+                    if j < gap break
+                    values[j] = v
+                    j = k
+                    k -= gap
+                }
+                values[j] = temp
+            }
+        }
+    }
+
+    sub shellsort_uw(uword @requirezp values, ubyte num_elements) {
+        num_elements--
+        ubyte gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                uword @zp temp = peekw(values+i*$0002)
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                while j>=gap {
+                    uword @zp v = peekw(values+k*2)
+                    if v <= temp break
+                    pokew(values+j*2, v)
+                    j = k
+                    k -= gap
+                }
+                pokew(values+j*2, temp)
+            }
+        }
+    }
+
+
+    sub shellsort_by_ub(uword @requirezp ub_keys, uword @requirezp wordvalues, ubyte num_elements) {
+        ; sorts the 'wordvalues' array (no-split array of words) according to the 'ub_keys' array (which also gets sorted ofcourse).
+        num_elements--
+        ubyte @zp gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                ubyte @zp temp = ub_keys[i]
+                uword temp_wv = peekw(wordvalues + i*$0002)
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                repeat {
+                    ubyte @zp v = ub_keys[k]
+                    if v <= temp break
+                    if j < gap break
+                    ub_keys[j] = v
+                    pokew(wordvalues + j*$0002, peekw(wordvalues + k*$0002))
+                    j = k
+                    k -= gap
+                }
+                ub_keys[j] = temp
+                pokew(wordvalues + j*$0002, temp_wv)
+            }
+        }
+    }
+
+    sub shellsort_by_uw(uword @requirezp uw_keys, uword @requirezp wordvalues, ubyte num_elements) {
+        ; sorts the 'wordvalues' array according to the 'uw_keys' array (which also gets sorted ofcourse).
+        ; both arrays should be no-split array of words. uw_keys are unsigned.
+        num_elements--
+        ubyte gap
+        for gap in [132, 57, 23, 10, 4, 1] {
+            ubyte i
+            for i in gap to num_elements {
+                uword @zp temp = peekw(uw_keys+i*$0002)
+                uword temp_wv = peekw(wordvalues + i*$0002)
+                ubyte @zp j = i
+                ubyte @zp k = j-gap
+                while j>=gap {
+                    uword @zp v = peekw(uw_keys+k*2)
+                    if v <= temp break
+                    pokew(uw_keys+j*2, v)
+                    pokew(wordvalues + j*$0002, peekw(wordvalues + k*$0002))
+                    j = k
+                    k -= gap
+                }
+                pokew(uw_keys+j*2, temp)
+                pokew(wordvalues + j*$0002, temp_wv)
+            }
+        }
+    }
+}
@@ -362,15 +362,13 @@ Read the `conv source code <https://github.com/irmen/prog8/tree/master/compiler/
 to see what's in there.


-coroutines (experimental)
-------------------------
+coroutines
+----------
 Provides a system to make cooperative multitasking programs via coroutines.
 A 'coroutine' is a subroutine whose execution you can pause and resume.
 This library handles the voodoo for you to switch between such coroutines transparently,
 so it can seem that your program is executing many subroutines at the same time.

-API is experimental and may change or disappear in a future version.
-
 Read the `coroutines source code <https://github.com/irmen/prog8/tree/master/compiler/res/prog8lib/coroutines.p8>`_
 to see what's in there. And look at the ``multitasking`` example to see how it can be used.
 Here is a minimal example (if the library gets more stable, better docs will be written here)::
@@ -1178,9 +1176,7 @@ to see what's in there. (Note: slight variations for different compiler targets)

 verafx  (cx16 only)
 -------------------
-Available for the Cx16 target.
-Experimental routines that use the new Vera FX logic (hopefully coming in the Vera in new X16 boards,
-the emulators already support it).
+Available for the Cx16 target. Routines that use the Vera FX logic to accelerate certain operations.

 ``available``
    Returns true if Vera FX is available, false if not (that would be an older Vera chip)
@@ -24,7 +24,6 @@ Comments
    Everything on the line after a semicolon ``;`` is a comment and is ignored by the compiler.
    If the whole line is just a comment, this line will be copied into the resulting assembly source code for reference.
    There's also a block-comment: everything surrounded with ``/*`` and ``*/`` is ignored and this can span multiple lines.
-    This block comment is experimental for now: it may change or even be removed again in a future compiler version.
    The recommended way to comment out a bunch of lines remains to just bulk comment them individually with ``;``.

 Directive
@@ -10,7 +10,6 @@ Idea is to make it feature complete in the IR/Virtual target, then merge it to m
 Future Things and Ideas
 ^^^^^^^^^^^^^^^^^^^^^^^

- remove (experimental) tag in docs from 3 libraries
 - when a complete block is removed because unused, suppress all info messages about everything in the block being removed
 - is "checkAssignmentCompatible" redundant (gets called just 1 time!) when we also have "checkValueTypeAndRange" ?
 - enums?
@@ -63,7 +62,6 @@ IR/VM
 Libraries
 ---------
 - Add split-word array sorting routines to sorting module?
- Add double-array sorting routines to sorting module? (that allows you to sort a second array in sync with the array of numbers)
 - See if the raster interrupt handler on the C64 can be tweaked to be a more stable raster irq
 - pet32 target: make syslib more complete (missing kernal routines)?
 - need help with: PET disk routines (OPEN, SETLFS etc are not exposed as kernal calls)
@@ -1,39 +1,69 @@
 %import textio
+%import sorting
 %import strings
+%import emudbg
 %zeropage basicsafe
-%encoding petscii

 main {
    sub start() {
-        str textiso = iso:"first\x0asecond\x0athird\x0a"
-        str textpetscii = petscii:"first\nsecond\nthird\n"
+        ubyte[19] keys
+        str[19] @nosplit values = [ "the", "narrator", "begins", "by", "describing", "the", "hole", "in", "the", "ground", "beneath", "a", "hill", "in", "which", "a", "particular", "hobbit", "lives"]

-        txt.print("    iso: ")
-        dump(textiso)
+        uword[19] @nosplit wkeys
+        str[19] @nosplit wvalues = [ "the", "narrator", "begins", "by", "describing", "the", "hole", "in", "the", "ground", "beneath", "a", "hill", "in", "which", "a", "particular", "hobbit", "lives"]
+
+        for cx16.r0L in 0 to len(keys)-1 {
+            keys[cx16.r0L] = strings.length(values[cx16.r0L])
+            wkeys[cx16.r0L] = strings.length(wvalues[cx16.r0L])
+        }
+
+        perf_reset()
+        repeat 100 {
+            sorting.gnomesort_by_ub(keys, values, len(keys))
+        }
+        perf_print()
+        dump()
        txt.nl()
-        cx16.r0L, void = strings.find(textiso, '\n')
-        txt.print_ub(cx16.r0L)
-        txt.spc()
-        cx16.r0L, void = strings.find_eol(textiso)
-        txt.print_ub(cx16.r0L)
        txt.nl()

-        txt.print("petscii: ")
-        dump(textpetscii)
        txt.nl()
-        cx16.r0L, void = strings.find(textpetscii, '\n')
-        txt.print_ub(cx16.r0L)
-        txt.spc()
-        cx16.r0L, void = strings.find_eol(textpetscii)
-        txt.print_ub(cx16.r0L)
-        txt.nl()
-    }
+        perf_reset()
+        repeat 100 {
+            sorting.gnomesort_by_uw(wkeys, wvalues, len(wkeys))
+        }
+        perf_print()
+        dumpw()

-    sub dump(uword ptr) {
-        while @(ptr)!=0 {
-            txt.print_ubhex(@(ptr), false)
-            txt.spc()
-            ptr++
+        sub dump() {
+            for cx16.r0L in 0 to len(keys)-1 {
+                txt.print_ub(keys[cx16.r0L])
+                txt.spc()
+                txt.spc()
+                txt.print(values[cx16.r0L])
+                txt.nl()
+            }
+        }
+
+        sub dumpw() {
+            for cx16.r0L in 0 to len(wkeys)-1 {
+                txt.print_uw(wkeys[cx16.r0L])
+                txt.spc()
+                txt.spc()
+                txt.print(wvalues[cx16.r0L])
+                txt.nl()
+            }
        }
    }
+
+
+    sub perf_reset() {
+        emudbg.reset_cpu_cycles()
+    }
+
+    sub perf_print() {
+        cx16.r4, cx16.r5 = emudbg.cpu_cycles()
+        txt.print_uwhex(cx16.r5, true)
+        txt.print_uwhex(cx16.r4, false)
+        txt.nl()
+    }
 }