optimize trivial 65c02 stack instructions

2025-08-07 21:25:18 +00:00 · 2024-06-29 15:41:39 +02:00
parent 1c97c22eff
commit 7a9dd1ac9b
4 changed files with 84 additions and 132 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
@@ -384,6 +384,7 @@ private fun optimizeStoreLoadSame(
    for (lines in linesByFour) {
        val first = lines[1].value.trimStart()
        val second = lines[2].value.trimStart()
        val third = lines[3].value.trimStart()
        // sta X + lda X,  sty X + ldy X,   stx X + ldx X  -> the second instruction can OFTEN be eliminated
        if ((first.startsWith("sta ") && second.startsWith("lda ")) ||
@@ -393,7 +394,6 @@ private fun optimizeStoreLoadSame(
                (first.startsWith("ldy ") && second.startsWith("ldy ")) ||
                (first.startsWith("ldx ") && second.startsWith("ldx "))
        ) {
            val third = lines[3].value.trimStart()
            val attemptRemove =
                if(third.isBranch()) {
                    // a branch instruction follows, we can only remove the load instruction if
@@ -446,6 +446,23 @@ private fun optimizeStoreLoadSame(
            if (firstLoc == secondLoc)
                mods.add(Modification(lines[2].index, true, null))
        }
        // phy + ldy + pla -> tya + ldy
        // phx + ldx + pla -> txa + ldx
        // pha + lda + pla -> nop
        if(first=="phy" && second.startsWith("ldy ") && third=="pla") {
            mods.add(Modification(lines[3].index, true, null))
            mods.add(Modification(lines[1].index, false, "  tya"))
        }
        else if(first=="phx" && second.startsWith("ldx ") && third=="pla") {
            mods.add(Modification(lines[3].index, true, null))
            mods.add(Modification(lines[1].index, false, "  txa"))
        }
        else if(first=="pha" && second.startsWith("lda ") && third=="pla") {
            mods.add(Modification(lines[1].index, true, null))
            mods.add(Modification(lines[2].index, true, null))
            mods.add(Modification(lines[3].index, true, null))
        }
    }
    return mods
 }
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
@@ -3182,11 +3182,20 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                    }
                    else {
                        if (regs !in Cx16VirtualRegisters) {
-                            when (regs) {
+                            if (asmgen.isTargetCpu(CpuType.CPU65c02)) {
-                                RegisterOrPair.AX -> asmgen.out("  pha |  txa |  pha")
+                                when (regs) {
-                                RegisterOrPair.AY -> asmgen.out("  pha |  tya |  pha")
+                                    RegisterOrPair.AX -> asmgen.out("  pha |  phx")
-                                RegisterOrPair.XY -> asmgen.out("  txa |  pha |  tya |  pha")
+                                    RegisterOrPair.AY -> asmgen.out("  pha |  phy")
-                                else -> throw AssemblyError("expected reg pair")
+                                    RegisterOrPair.XY -> asmgen.out("  phx |  phy")
                                    else -> throw AssemblyError("expected reg pair")
                                }
                            } else {
                                when (regs) {
                                    RegisterOrPair.AX -> asmgen.out("  pha |  txa |  pha")
                                    RegisterOrPair.AY -> asmgen.out("  pha |  tya |  pha")
                                    RegisterOrPair.XY -> asmgen.out("  txa |  pha |  tya |  pha")
                                    else -> throw AssemblyError("expected reg pair")
                                }
                            }
                            asmgen.loadScaledArrayIndexIntoRegister(target.array, CpuRegister.Y)
                            asmgen.out("""
@@ -3225,11 +3234,20 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                    }
                    else {
                        if (regs !in Cx16VirtualRegisters) {
-                            when (regs) {
+                            if (asmgen.isTargetCpu(CpuType.CPU65c02)) {
-                                RegisterOrPair.AX -> asmgen.out("  pha |  txa |  pha")
+                                when (regs) {
-                                RegisterOrPair.AY -> asmgen.out("  pha |  tya |  pha")
+                                    RegisterOrPair.AX -> asmgen.out("  pha |  phx")
-                                RegisterOrPair.XY -> asmgen.out("  txa |  pha |  tya |  pha")
+                                    RegisterOrPair.AY -> asmgen.out("  pha |  phy")
-                                else -> throw AssemblyError("expected reg pair")
+                                    RegisterOrPair.XY -> asmgen.out("  phx |  phy")
                                    else -> throw AssemblyError("expected reg pair")
                                }
                            } else {
                                when (regs) {
                                    RegisterOrPair.AX -> asmgen.out("  pha |  txa |  pha")
                                    RegisterOrPair.AY -> asmgen.out("  pha |  tya |  pha")
                                    RegisterOrPair.XY -> asmgen.out("  txa |  pha |  tya |  pha")
                                    else -> throw AssemblyError("expected reg pair")
                                }
                            }
                            asmgen.loadScaledArrayIndexIntoRegister(target.array, CpuRegister.Y)
                            asmgen.out("""
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -6,8 +6,6 @@ causes compiler error for virtual: just calling txt.cls() gives compile error un
 https://github.com/irmen/prog8/issues/136 (string.find register order issue)
 optimization: for 65c02 sometimes tya pha is generated, could be just phy (mind if A gets used afterwards though!) (same for pla tay etcetera?)
 if-optimization:
        if row == NUMQUEENS {
            print_solution()
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,133 +1,52 @@
 %import math
 %import textio
 %zeropage basicsafe
 %option no_sysinit
 main {
    sub start() {
-        cx16.r0sL = 127
+        signed()
-        cx16.r0sL = bytefunc(cx16.r0sL+1)
+        unsigned()
        cx16.r0sL = 0
        cx16.r0sL = bytefunc(cx16.r0sL-1)
        cx16.r0sL = 55
        cx16.r0sL = bytefunc(cx16.r0sL+20)
        cx16.r0sL = 55
        cx16.r0sL = bytefunc(cx16.r0sL-20)
        cx16.r0s = $99ff as word
        cx16.r0s = wordfunc(cx16.r0s+1)
        cx16.r0s = $9900 as word
        cx16.r0s = wordfunc(cx16.r0s-1)
        cx16.r0s = -12345
        cx16.r0s = wordfunc(cx16.r0s+100)
        cx16.r0s = -12345
        cx16.r0s = wordfunc(cx16.r0s-100)
    }
    sub signed() {
        byte @shared bvalue = -100
        word @shared wvalue = -20000
-    sub bytefunc(byte x) -> byte {
+        bvalue /= 2     ; TODO should be a simple bit shift?
-        txt.print_ubhex(x as ubyte, true)
+        wvalue /= 2     ; TODO should be a simple bit shift?
-        txt.spc()
+
-        txt.print_b(x)
+        txt.print_b(bvalue)
        txt.nl()
        txt.print_w(wvalue)
        txt.nl()
        bvalue *= 2
        wvalue *= 2
        txt.print_b(bvalue)
        txt.nl()
        txt.print_w(wvalue)
        txt.nl()
        return x
    }
-    sub wordfunc(word x) -> word {
+    sub unsigned() {
-        txt.print_uwhex(x as uword, true)
+        ubyte @shared ubvalue = 100
-        txt.spc()
+        uword @shared uwvalue = 20000
-        txt.print_w(x)
+
        ubvalue /= 2
        uwvalue /= 2
        txt.print_ub(ubvalue)
        txt.nl()
        txt.print_uw(uwvalue)
        txt.nl()
        ubvalue *= 2
        uwvalue *= 2
        txt.print_ub(ubvalue)
        txt.nl()
        txt.print_uw(uwvalue)
        txt.nl()
        return x
    }
 }
 ;%import math
 ;%import sprites
 ;
 ;main {
 ;    word[128] @split xpos_orig
 ;    word[128] @split ypos_orig
 ;    word[128] xpos
 ;    word[128] ypos
 ;    ubyte[128] tt
 ;
 ;    sub start() {
 ;        cx16.mouse_config2(1)
 ;        sprites.set_mousepointer_hand()
 ;        ubyte sprdat_bank
 ;        uword sprdat_addr
 ;        sprdat_bank, sprdat_addr = sprites.get_data_ptr(0)
 ;
 ;        ubyte sprite
 ;        for sprite in 0 to 127 {
 ;            sprites.init(sprite, sprdat_bank, sprdat_addr, sprites.SIZE_16, sprites.SIZE_16, sprites.COLORS_256, 0)
 ;            xpos_orig[sprite] = sprite*$0003 +100 as word
 ;            ypos_orig[sprite] = sprite*$0002 +100 as word
 ;            tt[sprite] = math.rnd()
 ;        }
 ;
 ;        repeat {
 ;            sys.waitvsync()
 ;            sprites.pos_batch(0, 128, &xpos, &ypos)
 ;            for sprite in 0 to 127 {
 ;                tt[sprite]++
 ;                xpos[sprite] = xpos_orig[sprite] + math.sin8(tt[sprite])
 ;                ypos[sprite] = ypos_orig[sprite] + math.cos8(tt[sprite])
 ;            }
 ;        }
 ;    }
 ;}
 ;
 ;
 ;;%import textio
 ;;%zeropage basicsafe
 ;;%option no_sysinit
 ;;
 ;;main {
 ;;    sub start() {
 ;;        signed()
 ;;        unsigned()
 ;;    }
 ;;
 ;;    sub signed() {
 ;;        byte @shared bvalue = -100
 ;;        word @shared wvalue = -20000
 ;;
 ;;        bvalue /= 2     ; TODO should be a simple bit shift?
 ;;        wvalue /= 2     ; TODO should be a simple bit shift?
 ;;
 ;;        txt.print_b(bvalue)
 ;;        txt.nl()
 ;;        txt.print_w(wvalue)
 ;;        txt.nl()
 ;;
 ;;        bvalue *= 2
 ;;        wvalue *= 2
 ;;
 ;;        txt.print_b(bvalue)
 ;;        txt.nl()
 ;;        txt.print_w(wvalue)
 ;;        txt.nl()
 ;;    }
 ;;
 ;;    sub unsigned() {
 ;;        ubyte @shared ubvalue = 100
 ;;        uword @shared uwvalue = 20000
 ;;
 ;;        ubvalue /= 2
 ;;        uwvalue /= 2
 ;;
 ;;        txt.print_ub(ubvalue)
 ;;        txt.nl()
 ;;        txt.print_uw(uwvalue)
 ;;        txt.nl()
 ;;
 ;;        ubvalue *= 2
 ;;        uwvalue *= 2
 ;;
 ;;        txt.print_ub(ubvalue)
 ;;        txt.nl()
 ;;        txt.print_uw(uwvalue)
 ;;        txt.nl()
 ;;    }
 ;;}