fix forloop codegen over non-split word arrays of length >= 64 elements

2025-11-03 19:16:13 +00:00 · 2025-05-13 22:43:20 +02:00
parent e31ef6f06f
commit 67231af623
4 changed files with 22 additions and 79 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt
@@ -622,7 +622,6 @@ $loopLabel          sty  $indexVar
                asmgen.out(endLabel)
            }
            iterableDt.isWordArray -> {
-                val length = numElements * 2
                val indexVar = if(asmgen.options.romable)
                    asmgen.createTempVarReused(BaseDataType.UBYTE, false, stmt)
                else
@@ -636,16 +635,16 @@ $loopLabel          sty  $indexVar
                    lda  $iterableName+1,y
                    sta  $loopvarName+1""")
                asmgen.translate(stmt.statements)
-                if(length<=127) {
+                if(numElements<=127) {
                    asmgen.out("""
                        ldy  $indexVar
                        iny
                        iny
-                        cpy  #$length
+                        cpy  #${numElements*2}
                        beq  $endLabel
                        bne  $loopLabel""")
                } else {
-                    // length is 128 words, 256 bytes
+                    // array size is 128 words, 256 bytes
                    asmgen.out("""
                        ldy  $indexVar
                        iny
@@ -654,7 +653,7 @@ $loopLabel          sty  $indexVar
                        beq  $endLabel""")
                }
                if(!asmgen.options.romable) {
-                    if(length>=16) {
+                    if(numElements>=16) {
                        // allocate index var on ZP if possible, otherwise inline
                        val result = zeropage.allocate(indexVar, DataType.UBYTE, null, stmt.position, asmgen.errors)
                        result.fold(
--- a/compiler/res/prog8lib/sorting.p8
+++ b/compiler/res/prog8lib/sorting.p8
@@ -65,7 +65,10 @@ _done
    */

    sub gnomesort_uw(uword values, ubyte num_elements) {
-        ; TODO optimize this more, rewrite in asm?
+        ; When written in asm this is 10-20% faster, but unreadable. Not worth it.
+        ; Also, sorting just an array of word numbers is very seldomly used, most often you
+        ; need to sort other things associated with it as well and that is not done here anyway,
+        ; so requires a custom user coded sorting routine anyway.
        ubyte @zp pos = 1
        uword @requirezp ptr = values+2
        while pos != num_elements {
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -60,6 +60,7 @@ IR/VM
 Libraries
 ---------
 - Add split-word array sorting routines to sorting module?
+- Add double-array sorting routines to sorting module? (that allows you to sort a second array in sync with the array of numbers)
 - cx16: _irq_dispatcher  now only dispatches a single irq source, better to ROL/BCC to handle *all* possible (multiple) sources.
 - See if the raster interrupt handler on the C64 can be tweaked to be a more stable raster irq
 - pet32 target: make syslib more complete (missing kernal routines)?
@@ -70,7 +71,6 @@ Libraries
 Optimizations
 -------------

- Sorting module gnomesort_uw could be optimized more by fully rewriting it in asm? Shellshort seems consistently faster even if most of the words are already sorted.
 - Compare output of some Oscar64 samples to what prog8 does for the equivalent code (see https://github.com/drmortalwombat/OscarTutorials/tree/main and https://github.com/drmortalwombat/oscar64/tree/main/samples)
 - Optimize the IfExpression code generation to be more like regular if-else code.  (both 6502 and IR) search for "TODO don't store condition as expression"
 - VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served?
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,82 +1,23 @@
 %import textio
 %zeropage basicsafe
-%option romable


 main {
+    uword[100] @nosplit array1 = 1 to 100
+    uword[100] @split array2 = 100 downto 1
+
    sub start() {
-        for cx16.r0L in "irmen" {
+        for cx16.r2 in array1 {
+            txt.print_uw(cx16.r2)
+            txt.spc()
+        }
        txt.nl()
-            txt.chrout(cx16.r0L)
-            for cx16.r1L in "green" {
-                txt.chrout(cx16.r1L)
-            }
-        }
-
-        for cx16.r0L in "irmen" {
        txt.nl()
-            txt.chrout(cx16.r0L)
-            for cx16.r1L in "blue" {
-                txt.chrout(cx16.r1L)
+        for cx16.r2 in array2 {
+            txt.print_uw(cx16.r2)
+            txt.spc()
        }
-        }
-
-        for cx16.r0L in "irmen" {
        txt.nl()
-            txt.chrout(cx16.r0L)
-            for cx16.r1L in "red" {
-                txt.chrout(cx16.r1L)
-            }
-        }
-
-        for cx16.r0L in [11,22,33,44]
-            cx16.r1L++
-        for cx16.r0L in [11,22,33,44]
-            cx16.r1L++
-        for cx16.r0L in [11,22,33,44]
-            cx16.r1L++
-
-        bool z
-
-        for z in [true, true, false, false]
-            cx16.r1L++
-        for z in [true, true, false, false]
-            cx16.r1L++
-        for z in [true, true, false, false]
-            cx16.r1L++
-
-        for cx16.r0 in [1111,2222,3333]
-            cx16.r1L++
-        for cx16.r0 in [1111,2222,3333]
-            cx16.r1L++
-        for cx16.r0 in [1111,2222,3333]
-            cx16.r1L++
-
-;        repeat 2 {
-;            repeat 2 {
-;                repeat 260 {
-;                    repeat 260 {
-;                        cx16.r0++
-;                    }
-;                }
-;            }
-;        }
-;
-;        txt.print_uw(cx16.r0)
-;        txt.nl()
-;        cx16.r0=0
-;
-;        repeat 2 {
-;            repeat 2 {
-;                repeat 260 {
-;                    repeat 260 {
-;                        cx16.r0++
-;                    }
-;                }
-;            }
-;        }
-;
-;        txt.print_uw(cx16.r0)
-;        txt.nl()
+        txt.nl()
    }
 }