improved codegen for for loops downto 0

This commit is contained in:
Irmen de Jong 2024-09-10 20:35:43 +02:00
parent 4a2dcd20d1
commit 699a2bb7ab
4 changed files with 104 additions and 164 deletions

View File

@ -60,13 +60,18 @@ internal class ForLoopsAsmGen(
// pre-check for end already reached
if(iterableDt==DataType.ARRAY_B) {
asmgen.out(" sta $modifiedLabel+1")
if(stepsize<0)
asmgen.out("""
clc
sbc $varname
bvc +
eor #${'$'}80
+ bpl $endLabel""")
if(stepsize<0) {
if(range.to.asConstInteger()==0 && asmgen.options.optimize) {
throw AssemblyError("downto 0 (signed byte) should have been replaced by an until-loop")
} else {
asmgen.out("""
clc
sbc $varname
bvc +
eor #${'$'}80
+ bpl $endLabel""")
}
}
else
asmgen.out("""
sec
@ -75,12 +80,17 @@ internal class ForLoopsAsmGen(
eor #${'$'}80
+ bmi $endLabel""")
} else {
if(stepsize<0)
asmgen.out("""
cmp $varname
beq +
bcs $endLabel
if(stepsize<0) {
if(range.to.asConstInteger()==0 && asmgen.options.optimize) {
throw AssemblyError("downto 0 (unsigned byte) should have been replaced by an until-loop")
} else {
asmgen.out("""
cmp $varname
beq +
bcs $endLabel
+""")
}
}
else
asmgen.out(" cmp $varname | bcc $endLabel")
asmgen.out(" sta $modifiedLabel+1")
@ -160,6 +170,8 @@ $modifiedLabel cmp #0 ; modified
// words, step 1 or -1
stepsize == 1 || stepsize == -1 -> {
if(range.to.asConstInteger()==0 && asmgen.options.optimize)
throw AssemblyError("downto 0 (words) should have been replaced by an until-loop")
val varname = asmgen.asmVariableName(stmt.variable)
assignLoopvarWord(stmt, range)
asmgen.assignExpressionToRegister(range.to, RegisterOrPair.AY)
@ -486,7 +498,7 @@ $loopLabel sty $indexVar
if(iterableDt==DataType.ARRAY_B || iterableDt==DataType.ARRAY_UB) {
if(range.last==range.first) return translateForSimpleByteRangeAsc(stmt, range)
if(range.step==1 && range.last>range.first) return translateForSimpleByteRangeAsc(stmt, range)
if(range.step==-1 && range.last<range.first) return translateForSimpleByteRangeDesc(stmt, range)
if(range.step==-1 && range.last<range.first) return translateForSimpleByteRangeDesc(stmt, range, iterableDt==DataType.ARRAY_UB)
}
else if(iterableDt==DataType.ARRAY_W || iterableDt==DataType.ARRAY_UW) {
if(range.last==range.first) return translateForSimpleWordRangeAsc(stmt, range)
@ -632,41 +644,41 @@ $endLabel""")
asmgen.loopEndLabels.removeLast()
}
private fun translateForSimpleByteRangeDesc(stmt: PtForLoop, range: IntProgression) {
private fun translateForSimpleByteRangeDesc(stmt: PtForLoop, range: IntProgression, unsigned: Boolean) {
val loopLabel = asmgen.makeLabel("for_loop")
val endLabel = asmgen.makeLabel("for_end")
asmgen.loopEndLabels.add(endLabel)
val varname = asmgen.asmVariableName(stmt.variable)
asmgen.out("""
lda #${range.first}
sta $varname
lda #${range.first}
sta $varname
$loopLabel""")
asmgen.translate(stmt.statements)
when (range.last) {
0 -> {
asmgen.out("""
lda $varname
beq $endLabel
dec $varname""")
asmgen.jmp(loopLabel)
asmgen.out(endLabel)
if(!unsigned || range.first<=127) {
asmgen.out("""
dec $varname
bpl $loopLabel""")
} else {
asmgen.out("""
dec $varname
lda $varname
cmp #255
bne $loopLabel""")
}
}
1 -> {
asmgen.out("""
dec $varname
bne $loopLabel
$endLabel""")
bne $loopLabel""")
}
else -> {
asmgen.out("""
dec $varname
lda $varname
cmp #${range.last-1}
bne $loopLabel
$endLabel""")
bne $loopLabel""")
}
}
asmgen.loopEndLabels.removeLast()
}
private fun translateForSimpleWordRangeAsc(stmt: PtForLoop, range: IntProgression) {
@ -708,13 +720,22 @@ $loopLabel""")
sty $varname+1
$loopLabel""")
asmgen.translate(stmt.statements)
if(range.last==0) {
asmgen.out("""
lda $varname
bne ++
lda $varname+1
beq $endLabel""")
} else {
asmgen.out("""
lda $varname
cmp #<${range.last}
bne +
lda $varname+1
cmp #>${range.last}
beq $endLabel""")
}
asmgen.out("""
lda $varname
cmp #<${range.last}
bne +
lda $varname+1
cmp #>${range.last}
beq $endLabel
+ lda $varname
bne +
dec $varname+1

View File

@ -518,6 +518,12 @@ Loops
-----
The *for*-loop is used to let a variable iterate over a range of values. Iteration is done in steps of 1, but you can change this.
.. sidebar::
Optimization
Usually a loop in descending order downto 0 or 1, produces more efficient assembly code than the same loop in ascending order.
The loop variable must be declared separately as byte or word earlier, so that you can reuse it for multiple occasions.
Iterating with a floating point variable is not supported. If you want to loop over a floating-point array, use a loop with an integer index variable instead.
If the from value is already outside of the loop range, the whole for loop is skipped.

View File

@ -1,6 +1,12 @@
TODO
====
improve codegen for for loops downto 0,1 when start value is not const.
IR: Improve codegen for for loops downto 0. (BPL if <=127 etc like 6502 codegen?)
Improve register load order in subroutine call args assignments:
in certain situations, the "wrong" order of evaluation of function call arguments is done which results
in overwriting registers that already got their value, which requires a lot of stack juggling (especially on plain 6502 cpu!)

View File

@ -5,139 +5,46 @@
main {
sub start() {
ubyte @shared x
uword @shared w
bool flag1, flag2
sys.clear_carry()
flag1 = onlyCarry()
if flag1
txt.print("1: ok\n")
else
txt.print("1: fail\n")
x=1
flag1 = onlyZero()
if flag1
txt.print("2: ok\n")
else
txt.print("2: fail\n")
sys.clear_carry()
flag1, x = carryAndByte()
if flag1 and x==42
txt.print("3: ok\n")
else
txt.print("3: fail\n")
sys.clear_carry()
flag1, w = carryAndWord()
if flag1 and w==4242
txt.print("4: ok\n")
else
txt.print("4: fail\n")
sys.clear_carry()
flag1, x, w = carryAndValues()
if flag1 and x==99 and w==9999
txt.print("5: ok\n")
else
txt.print("5: fail\n")
x = 1
sys.clear_carry()
flag1, flag2 = onlyCarryAndZero()
if flag1 and flag2
txt.print("6: ok\n")
else
txt.print("6: fail\n")
x = 1
sys.clear_carry()
flag1, flag2, x = carryAndZeroAndByte()
if flag1 and flag2 and x==33
txt.print("7: ok\n")
else
txt.print("7: fail\n")
x = 1
sys.clear_carry()
flag1, flag2, x, w = carryAndNegativeAndByteAndWord()
if flag1 and flag2 and x==55 and w==51400
txt.print("8: ok\n")
else
txt.print("8: fail\n")
}
ubyte x
uword w
uword @shared wstart=50000
ubyte @shared bstart=127
uword y
uword duration
byte b
asmsub carryAndNegativeAndByteAndWord() -> bool @Pc, bool @Pn, ubyte @X, uword @AY {
%asm {{
ldx #55
lda #200
ldy #200
sec
rts
}}
}
cbm.SETTIM(0,0,0)
repeat 5000 {
y=0
; for x in bstart downto 0 {
; y++
; }
x = bstart
do {
y++
x--
} until x==255
}
txt.print_uw(cbm.RDTIM16())
if y!=128
txt.print("error 1\n")
asmsub carryAndZeroAndByte() -> bool @Pc, bool @Pz, ubyte @Y {
%asm {{
ldy #33
lda #0
sec
rts
}}
}
/*
for w in 65535 downto 0 {
y++
}
if y!=0
txt.print("error 10\n")
y=0
for w in 0 to 65535 {
y++
}
if y!=0
txt.print("error 11\n")
*/
asmsub onlyCarryAndZero() -> bool @Pc, bool @Pz {
%asm {{
lda #0
sec
rts
}}
}
asmsub carryAndValues() -> bool @Pc, ubyte @X, uword @AY {
%asm {{
ldx #99
lda #<9999
ldy #>9999
sec
rts
}}
}
asmsub carryAndWord() -> bool @Pc, uword @AY {
%asm {{
lda #<4242
ldy #>4242
sec
rts
}}
}
asmsub carryAndByte() -> bool @Pc, ubyte @A {
%asm {{
lda #42
sec
rts
}}
}
asmsub onlyCarry() -> bool @Pc {
%asm {{
sec
rts
}}
}
asmsub onlyZero() -> bool @Pz {
%asm {{
lda #0
rts
}}
txt.print("\nall done\n")
}
}