fix forloop codegen over non-split word arrays of length >= 64 elements

This commit is contained in:
Irmen de Jong
2025-05-13 22:43:20 +02:00
parent e31ef6f06f
commit 67231af623
4 changed files with 22 additions and 79 deletions

View File

@@ -622,7 +622,6 @@ $loopLabel sty $indexVar
asmgen.out(endLabel)
}
iterableDt.isWordArray -> {
val length = numElements * 2
val indexVar = if(asmgen.options.romable)
asmgen.createTempVarReused(BaseDataType.UBYTE, false, stmt)
else
@@ -636,16 +635,16 @@ $loopLabel sty $indexVar
lda $iterableName+1,y
sta $loopvarName+1""")
asmgen.translate(stmt.statements)
if(length<=127) {
if(numElements<=127) {
asmgen.out("""
ldy $indexVar
iny
iny
cpy #$length
cpy #${numElements*2}
beq $endLabel
bne $loopLabel""")
} else {
// length is 128 words, 256 bytes
// array size is 128 words, 256 bytes
asmgen.out("""
ldy $indexVar
iny
@@ -654,7 +653,7 @@ $loopLabel sty $indexVar
beq $endLabel""")
}
if(!asmgen.options.romable) {
if(length>=16) {
if(numElements>=16) {
// allocate index var on ZP if possible, otherwise inline
val result = zeropage.allocate(indexVar, DataType.UBYTE, null, stmt.position, asmgen.errors)
result.fold(

View File

@@ -65,7 +65,10 @@ _done
*/
sub gnomesort_uw(uword values, ubyte num_elements) {
; TODO optimize this more, rewrite in asm?
; When written in asm this is 10-20% faster, but unreadable. Not worth it.
; Also, sorting just an array of word numbers is very seldomly used, most often you
; need to sort other things associated with it as well and that is not done here anyway,
; so requires a custom user coded sorting routine anyway.
ubyte @zp pos = 1
uword @requirezp ptr = values+2
while pos != num_elements {

View File

@@ -60,6 +60,7 @@ IR/VM
Libraries
---------
- Add split-word array sorting routines to sorting module?
- Add double-array sorting routines to sorting module? (that allows you to sort a second array in sync with the array of numbers)
- cx16: _irq_dispatcher now only dispatches a single irq source, better to ROL/BCC to handle *all* possible (multiple) sources.
- See if the raster interrupt handler on the C64 can be tweaked to be a more stable raster irq
- pet32 target: make syslib more complete (missing kernal routines)?
@@ -70,7 +71,6 @@ Libraries
Optimizations
-------------
- Sorting module gnomesort_uw could be optimized more by fully rewriting it in asm? Shellshort seems consistently faster even if most of the words are already sorted.
- Compare output of some Oscar64 samples to what prog8 does for the equivalent code (see https://github.com/drmortalwombat/OscarTutorials/tree/main and https://github.com/drmortalwombat/oscar64/tree/main/samples)
- Optimize the IfExpression code generation to be more like regular if-else code. (both 6502 and IR) search for "TODO don't store condition as expression"
- VariableAllocator: can we think of a smarter strategy for allocating variables into zeropage, rather than first-come-first-served?

View File

@@ -1,82 +1,23 @@
%import textio
%zeropage basicsafe
%option romable
main {
uword[100] @nosplit array1 = 1 to 100
uword[100] @split array2 = 100 downto 1
sub start() {
for cx16.r0L in "irmen" {
for cx16.r2 in array1 {
txt.print_uw(cx16.r2)
txt.spc()
}
txt.nl()
txt.chrout(cx16.r0L)
for cx16.r1L in "green" {
txt.chrout(cx16.r1L)
}
}
for cx16.r0L in "irmen" {
txt.nl()
txt.chrout(cx16.r0L)
for cx16.r1L in "blue" {
txt.chrout(cx16.r1L)
for cx16.r2 in array2 {
txt.print_uw(cx16.r2)
txt.spc()
}
}
for cx16.r0L in "irmen" {
txt.nl()
txt.chrout(cx16.r0L)
for cx16.r1L in "red" {
txt.chrout(cx16.r1L)
}
}
for cx16.r0L in [11,22,33,44]
cx16.r1L++
for cx16.r0L in [11,22,33,44]
cx16.r1L++
for cx16.r0L in [11,22,33,44]
cx16.r1L++
bool z
for z in [true, true, false, false]
cx16.r1L++
for z in [true, true, false, false]
cx16.r1L++
for z in [true, true, false, false]
cx16.r1L++
for cx16.r0 in [1111,2222,3333]
cx16.r1L++
for cx16.r0 in [1111,2222,3333]
cx16.r1L++
for cx16.r0 in [1111,2222,3333]
cx16.r1L++
; repeat 2 {
; repeat 2 {
; repeat 260 {
; repeat 260 {
; cx16.r0++
; }
; }
; }
; }
;
; txt.print_uw(cx16.r0)
; txt.nl()
; cx16.r0=0
;
; repeat 2 {
; repeat 2 {
; repeat 260 {
; repeat 260 {
; cx16.r0++
; }
; }
; }
; }
;
; txt.print_uw(cx16.r0)
; txt.nl()
txt.nl()
}
}