From 699a2bb7ab0c67047ec7a206f170a44bdee4e46b Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Tue, 10 Sep 2024 20:35:43 +0200 Subject: [PATCH] improved codegen for for loops downto 0 --- .../prog8/codegen/cpu6502/ForLoopsAsmGen.kt | 91 ++++++---- docs/source/programming.rst | 6 + docs/source/todo.rst | 6 + examples/test.p8 | 165 ++++-------------- 4 files changed, 104 insertions(+), 164 deletions(-) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt index a1b566a6f..2b7f37e3b 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/ForLoopsAsmGen.kt @@ -60,13 +60,18 @@ internal class ForLoopsAsmGen( // pre-check for end already reached if(iterableDt==DataType.ARRAY_B) { asmgen.out(" sta $modifiedLabel+1") - if(stepsize<0) - asmgen.out(""" - clc - sbc $varname - bvc + - eor #${'$'}80 -+ bpl $endLabel""") + if(stepsize<0) { + if(range.to.asConstInteger()==0 && asmgen.options.optimize) { + throw AssemblyError("downto 0 (signed byte) should have been replaced by an until-loop") + } else { + asmgen.out(""" + clc + sbc $varname + bvc + + eor #${'$'}80 ++ bpl $endLabel""") + } + } else asmgen.out(""" sec @@ -75,12 +80,17 @@ internal class ForLoopsAsmGen( eor #${'$'}80 + bmi $endLabel""") } else { - if(stepsize<0) - asmgen.out(""" - cmp $varname - beq + - bcs $endLabel + if(stepsize<0) { + if(range.to.asConstInteger()==0 && asmgen.options.optimize) { + throw AssemblyError("downto 0 (unsigned byte) should have been replaced by an until-loop") + } else { + asmgen.out(""" + cmp $varname + beq + + bcs $endLabel +""") + } + } else asmgen.out(" cmp $varname | bcc $endLabel") asmgen.out(" sta $modifiedLabel+1") @@ -160,6 +170,8 @@ $modifiedLabel cmp #0 ; modified // words, step 1 or -1 stepsize == 1 || stepsize == -1 -> { + if(range.to.asConstInteger()==0 && asmgen.options.optimize) + throw AssemblyError("downto 0 (words) should have been replaced by an until-loop") val varname = asmgen.asmVariableName(stmt.variable) assignLoopvarWord(stmt, range) asmgen.assignExpressionToRegister(range.to, RegisterOrPair.AY) @@ -486,7 +498,7 @@ $loopLabel sty $indexVar if(iterableDt==DataType.ARRAY_B || iterableDt==DataType.ARRAY_UB) { if(range.last==range.first) return translateForSimpleByteRangeAsc(stmt, range) if(range.step==1 && range.last>range.first) return translateForSimpleByteRangeAsc(stmt, range) - if(range.step==-1 && range.last { - asmgen.out(""" - lda $varname - beq $endLabel - dec $varname""") - asmgen.jmp(loopLabel) - asmgen.out(endLabel) + if(!unsigned || range.first<=127) { + asmgen.out(""" + dec $varname + bpl $loopLabel""") + } else { + asmgen.out(""" + dec $varname + lda $varname + cmp #255 + bne $loopLabel""") + } } 1 -> { asmgen.out(""" dec $varname - bne $loopLabel -$endLabel""") + bne $loopLabel""") } else -> { asmgen.out(""" dec $varname lda $varname cmp #${range.last-1} - bne $loopLabel -$endLabel""") + bne $loopLabel""") } } - asmgen.loopEndLabels.removeLast() } private fun translateForSimpleWordRangeAsc(stmt: PtForLoop, range: IntProgression) { @@ -708,13 +720,22 @@ $loopLabel""") sty $varname+1 $loopLabel""") asmgen.translate(stmt.statements) + if(range.last==0) { + asmgen.out(""" + lda $varname + bne ++ + lda $varname+1 + beq $endLabel""") + } else { + asmgen.out(""" + lda $varname + cmp #<${range.last} + bne + + lda $varname+1 + cmp #>${range.last} + beq $endLabel""") + } asmgen.out(""" - lda $varname - cmp #<${range.last} - bne + - lda $varname+1 - cmp #>${range.last} - beq $endLabel + lda $varname bne + dec $varname+1 diff --git a/docs/source/programming.rst b/docs/source/programming.rst index b5d521b1c..d6802c4c4 100644 --- a/docs/source/programming.rst +++ b/docs/source/programming.rst @@ -518,6 +518,12 @@ Loops ----- The *for*-loop is used to let a variable iterate over a range of values. Iteration is done in steps of 1, but you can change this. + +.. sidebar:: + Optimization + + Usually a loop in descending order downto 0 or 1, produces more efficient assembly code than the same loop in ascending order. + The loop variable must be declared separately as byte or word earlier, so that you can reuse it for multiple occasions. Iterating with a floating point variable is not supported. If you want to loop over a floating-point array, use a loop with an integer index variable instead. If the from value is already outside of the loop range, the whole for loop is skipped. diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 181a4fc9c..45ebb49c4 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,6 +1,12 @@ TODO ==== +improve codegen for for loops downto 0,1 when start value is not const. + + +IR: Improve codegen for for loops downto 0. (BPL if <=127 etc like 6502 codegen?) + + Improve register load order in subroutine call args assignments: in certain situations, the "wrong" order of evaluation of function call arguments is done which results in overwriting registers that already got their value, which requires a lot of stack juggling (especially on plain 6502 cpu!) diff --git a/examples/test.p8 b/examples/test.p8 index 1bb8af7d9..2e4fc5c08 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -5,139 +5,46 @@ main { sub start() { - ubyte @shared x - uword @shared w - bool flag1, flag2 - - sys.clear_carry() - flag1 = onlyCarry() - if flag1 - txt.print("1: ok\n") - else - txt.print("1: fail\n") - - x=1 - flag1 = onlyZero() - if flag1 - txt.print("2: ok\n") - else - txt.print("2: fail\n") - - sys.clear_carry() - flag1, x = carryAndByte() - if flag1 and x==42 - txt.print("3: ok\n") - else - txt.print("3: fail\n") - - sys.clear_carry() - flag1, w = carryAndWord() - if flag1 and w==4242 - txt.print("4: ok\n") - else - txt.print("4: fail\n") - - sys.clear_carry() - flag1, x, w = carryAndValues() - if flag1 and x==99 and w==9999 - txt.print("5: ok\n") - else - txt.print("5: fail\n") - - x = 1 - sys.clear_carry() - flag1, flag2 = onlyCarryAndZero() - if flag1 and flag2 - txt.print("6: ok\n") - else - txt.print("6: fail\n") - - x = 1 - sys.clear_carry() - flag1, flag2, x = carryAndZeroAndByte() - if flag1 and flag2 and x==33 - txt.print("7: ok\n") - else - txt.print("7: fail\n") - - x = 1 - sys.clear_carry() - flag1, flag2, x, w = carryAndNegativeAndByteAndWord() - if flag1 and flag2 and x==55 and w==51400 - txt.print("8: ok\n") - else - txt.print("8: fail\n") - } + ubyte x + uword w + uword @shared wstart=50000 + ubyte @shared bstart=127 + uword y + uword duration + byte b - asmsub carryAndNegativeAndByteAndWord() -> bool @Pc, bool @Pn, ubyte @X, uword @AY { - %asm {{ - ldx #55 - lda #200 - ldy #200 - sec - rts - }} - } + cbm.SETTIM(0,0,0) + repeat 5000 { + y=0 +; for x in bstart downto 0 { +; y++ +; } + x = bstart + do { + y++ + x-- + } until x==255 + } + txt.print_uw(cbm.RDTIM16()) + if y!=128 + txt.print("error 1\n") - asmsub carryAndZeroAndByte() -> bool @Pc, bool @Pz, ubyte @Y { - %asm {{ - ldy #33 - lda #0 - sec - rts - }} - } +/* + for w in 65535 downto 0 { + y++ + } + if y!=0 + txt.print("error 10\n") + y=0 + for w in 0 to 65535 { + y++ + } + if y!=0 + txt.print("error 11\n") +*/ - asmsub onlyCarryAndZero() -> bool @Pc, bool @Pz { - %asm {{ - lda #0 - sec - rts - }} - } - - - asmsub carryAndValues() -> bool @Pc, ubyte @X, uword @AY { - %asm {{ - ldx #99 - lda #<9999 - ldy #>9999 - sec - rts - }} - } - - - asmsub carryAndWord() -> bool @Pc, uword @AY { - %asm {{ - lda #<4242 - ldy #>4242 - sec - rts - }} - } - - asmsub carryAndByte() -> bool @Pc, ubyte @A { - %asm {{ - lda #42 - sec - rts - }} - } - - asmsub onlyCarry() -> bool @Pc { - %asm {{ - sec - rts - }} - } - - asmsub onlyZero() -> bool @Pz { - %asm {{ - lda #0 - rts - }} + txt.print("\nall done\n") } }