From f249ccd41459ce11cf196c376a527d19678b2004 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Sun, 14 Aug 2022 12:50:46 +0200
Subject: [PATCH] added asm optimization for same pointer index

---
 .../src/prog8/codegen/cpu6502/AsmOptimizer.kt | 49 +++++++++++++++++++
 docs/source/todo.rst                          |  6 ---
 2 files changed, 49 insertions(+), 6 deletions(-)
diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
index 5deeb5406..f83fe53d9 100644
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/AsmOptimizer.kt
@@ -59,6 +59,13 @@ internal fun optimizeAssembly(lines: MutableList<String>, machine: IMachineDefin
         numberOfOptimizations++
     }
 
+    mods = optimizeSamePointerIndexing(linesByFourteen, machine, program)
+    if(mods.isNotEmpty()) {
+        apply(mods, lines)
+        linesByFourteen = getLinesBy(lines, 14)
+        numberOfOptimizations++
+    }
+
     // TODO more assembly peephole optimizations
 
     return numberOfOptimizations
@@ -320,6 +327,48 @@ private fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<Stri
     return mods
 }
 
+private fun optimizeSamePointerIndexing(linesByFourteen: List<List<IndexedValue<String>>>, machine: IMachineDefinition, program: Program): List<Modification> {
+
+    // Optimize same pointer indexing where for instance we load and store to the same ptr index in Y
+    // if Y isn't modified in between we can omit the second LDY:
+    //    ldy  #0
+    //    lda  (ptr),y
+    //    ora  #3       ; <-- instruction(s) that don't modify Y
+    //    ldy  #0       ; <-- can be removed
+    //    sta  (ptr),y
+
+    val mods = mutableListOf<Modification>()
+    for (lines in linesByFourteen) {
+        val first = lines[0].value.trimStart()
+        val second = lines[1].value.trimStart()
+        val third = lines[2].value.trimStart()
+        val fourth = lines[3].value.trimStart()
+        val fifth = lines[4].value.trimStart()
+        val sixth = lines[5].value.trimStart()
+
+        if(first.startsWith("ldy") && second.startsWith("lda") && fourth.startsWith("ldy") && fifth.startsWith("sta")) {
+            val firstvalue = first.substring(4)
+            val secondvalue = second.substring(4)
+            val fourthvalue = fourth.substring(4)
+            val fifthvalue = fifth.substring(4)
+            if("y" !in third && firstvalue==fourthvalue && secondvalue==fifthvalue && secondvalue.endsWith(",y") && fifthvalue.endsWith(",y")) {
+                mods.add(Modification(lines[3].index, true, null))
+            }
+        }
+        if(first.startsWith("ldy") && second.startsWith("lda") && fifth.startsWith("ldy") && sixth.startsWith("sta")) {
+            val firstvalue = first.substring(4)
+            val secondvalue = second.substring(4)
+            val fifthvalue = fifth.substring(4)
+            val sixthvalue = sixth.substring(4)
+            if("y" !in third && "y" !in fourth && firstvalue==fifthvalue && secondvalue==sixthvalue && secondvalue.endsWith(",y") && sixthvalue.endsWith(",y")) {
+                mods.add(Modification(lines[4].index, true, null))
+            }
+        }
+    }
+
+    return mods
+}
+
 private fun optimizeStoreLoadSame(linesByFour: List<List<IndexedValue<String>>>, machine: IMachineDefinition, program: Program): List<Modification> {
     // sta X + lda X,  sty X + ldy X,   stx X + ldx X  -> the second instruction can OFTEN be eliminated
     val mods = mutableListOf<Modification>()
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index 1559ea733..f504db8e2 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -3,12 +3,6 @@ TODO
 
 For next release
 ^^^^^^^^^^^^^^^^
-- @(ptr) |= 3 -> asm peephole optimize remove the second ldy  if the instruction before doesn't modify y
-	ldy  #0
-	lda  (starfieldPtr2),y
-	ora  #3
-	ldy  #0
-	sta  (starfieldPtr2),y
 - vm: intermediate code: don't flatten everything. Instead, as a new intermediary step,
   convert the new Ast into *structured* intermediary code.
   Basically keep the blocks and subroutines structure, including full subroutine signature information,