From 76aeb06c9748f0aa153970a7b7c6e9c82a8d3fd6 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Wed, 16 Jan 2019 23:56:50 +0100
Subject: [PATCH] float init optimization in asm

---
 compiler/src/prog8/compiler/Compiler.kt       |   4 +-
 .../prog8/compiler/target/c64/AsmOptimizer.kt |  24 +++-
 examples/test.p8                              | 131 ++++++++++++++----
 3 files changed, 128 insertions(+), 31 deletions(-)
diff --git a/compiler/src/prog8/compiler/Compiler.kt b/compiler/src/prog8/compiler/Compiler.kt
index f5eebadd2..ffd0d335c 100644
--- a/compiler/src/prog8/compiler/Compiler.kt
+++ b/compiler/src/prog8/compiler/Compiler.kt
@@ -654,13 +654,13 @@ private class StatementTranslator(private val prog: IntermediateProgram,
                     DataType.STR, DataType.STR_P, DataType.STR_S, DataType.STR_PS -> {
                         if(lv.heapId==null)
                             throw CompilerException("string should have been moved into heap   ${lv.position}")
-                        prog.instr(Opcode.PUSH_ADDR_HEAPVAR, callLabel = "@todo-string-varname?")    // XXX  push address of string
+                        TODO("push address of string with PUSH_ADDR_HEAPVAR")
                     }
                     DataType.ARRAY_UB, DataType.ARRAY_UW, DataType.ARRAY_F,
                     DataType.ARRAY_B, DataType.ARRAY_W -> {
                         if(lv.heapId==null)
                             throw CompilerException("array should have been moved into heap  ${lv.position}")
-                        prog.instr(Opcode.PUSH_WORD, Value(lv.type, lv.heapId))     // XXX  push address of array
+                        TODO("push address of array with PUSH_WORD")
                     }
                 }
             }
diff --git a/compiler/src/prog8/compiler/target/c64/AsmOptimizer.kt b/compiler/src/prog8/compiler/target/c64/AsmOptimizer.kt
index 677d01910..cc7a7d7c8 100644
--- a/compiler/src/prog8/compiler/target/c64/AsmOptimizer.kt
+++ b/compiler/src/prog8/compiler/target/c64/AsmOptimizer.kt
@@ -37,6 +37,7 @@ fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<String>>>):
 
     // optimize sequential assignments of the same value to various targets (bytes, words, floats)
     // the float one is the one that requires 2*7=14 lines of code to check...
+    // @todo a better place to do this is in the Compiler instead and work on opcodes, and never even create the inefficient asm...
 
     val removeLines = mutableListOf<Int>()
     for (pair in linesByFourteen) {
@@ -71,7 +72,28 @@ fun optimizeSameAssignments(linesByFourteen: List<List<IndexedValue<String>>>):
             }
         }
 
-        // @todo check float initializations.
+        if(first.startsWith("lda") && second.startsWith("ldy") && third.startsWith("sta") && fourth.startsWith("sty") &&
+                fifth.startsWith("lda") && sixth.startsWith("ldy") && seventh.startsWith("jsr  c64flt.copy_float")) {
+
+            val nineth = pair[8].value.trimStart()
+            val tenth = pair[9].value.trimStart()
+            val eleventh = pair[10].value.trimStart()
+            val twelveth = pair[11].value.trimStart()
+            val thirteenth = pair[12].value.trimStart()
+            val fourteenth = pair[13].value.trimStart()
+
+            if(eighth.startsWith("lda") && nineth.startsWith("ldy") && tenth.startsWith("sta") && eleventh.startsWith("sty") &&
+                    twelveth.startsWith("lda") && thirteenth.startsWith("ldy") && fourteenth.startsWith("jsr  c64flt.copy_float")) {
+
+                if(first.substring(4) == eighth.substring(4) && second.substring(4)==nineth.substring(4)) {
+                    // identical float init
+                    removeLines.add(pair[7].index)
+                    removeLines.add(pair[8].index)
+                    removeLines.add(pair[9].index)
+                    removeLines.add(pair[10].index)
+                }
+            }
+        }
     }
     return removeLines
 }
diff --git a/examples/test.p8 b/examples/test.p8
index cbd1f104a..082867d3d 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -5,37 +5,112 @@
 
     sub start()  {
 
-        ubyte ub
-        byte b
-        word w
-        uword uw
+        ubyte ub1
+        ubyte ub2
+        ubyte ub3
+        ubyte ub4
+        byte b1
+        byte b2
+        byte b3
+        byte b4
+        word w1
+        word w2
+        word w3
+        word w4
+        uword uw1
+        uword uw2
+        uword uw3
+        uword uw4
+        float f1
+        float f2
+        float f3
+        float f4
+        memory ubyte mub1 = $c000
+        memory ubyte mub2 = $c000
+        memory ubyte mub3 = $c000
+        memory ubyte mub4 = $c000
+        memory byte mb1 = $c000
+        memory byte mb2 = $c000
+        memory byte mb3 = $c000
+        memory byte mb4 = $c000
+        memory word mw1 = $c000
+        memory word mw2 = $c000
+        memory word mw3 = $c000
+        memory word mw4 = $c000
+        memory uword muw1 = $c000
+        memory uword muw2 = $c000
+        memory uword muw3 = $c000
+        memory uword muw4 = $c000
+        memory float mf1 = $c010
+        memory float mf2 = $c020
+        memory float mf3 = $c030
+        memory float mf4 = $c040
 
+        ub1 = $11
+        ub2 = $11
+        ub3 = $11
+        mub1 = $11
+        mub2 = $11
+        mub3 = $11
+        ub4 = $44
+        mub4 = $44
 
-        ubyte[2] uba
-        byte[2] ba
-        word[2] wa
-        uword[2] uwa
-        str s
-        str_p sp
-        str_s ss
-        str_ps sps
+        b1=$11
+        b2=$11
+        b3=$11
+        mb1=$11
+        mb2=$11
+        mb3=$11
+        b4=$44
+        mb4=$44
 
-        s = ub as str
-        sp = ub as str_p
-        ss = ub as str_s
-        sps = ub as str_ps
-        s = b as str
-        sp = b as str_p
-        ss = b as str_s
-        sps = b as str_ps
-        s = w as str
-        sp = w as str_p
-        ss = w as str_s
-        sps = w as str_ps
-        s = uw as str
-        sp = uw as str_p
-        ss = uw as str_s
-        sps = uw as str_ps
+        w1=$1111
+        w2=$1111
+        w3=$1111
+        mw1=$1111
+        mw2=$1111
+        mw3=$1111
+        w4=$4444
+        mw4=$4444
+
+        uw1=$1111
+        uw2=$1111
+        uw3=$1111
+        muw1=$1111
+        muw2=$1111
+        muw3=$1111
+        uw4=$4444
+        muw4=$4444
+
+        f1 = 12.11
+        f1 = 13.11
+        f1 = 14.11
+        f1 = 15.11
+        f1 = 11.11
+        f2 = 11.11
+        f3 = 11.11
+        mf1 = 11.11
+        mf2 = 11.11
+        mf3 = 11.11
+        f4 = 44.44
+        mf4 = 44.44
+
+        c64flt.print_f(f1)
+        c64.CHROUT('\n')
+        c64flt.print_f(f2)
+        c64.CHROUT('\n')
+        c64flt.print_f(f3)
+        c64.CHROUT('\n')
+        c64flt.print_f(f4)
+        c64.CHROUT('\n')
+        c64flt.print_f(mf1)
+        c64.CHROUT('\n')
+        c64flt.print_f(mf2)
+        c64.CHROUT('\n')
+        c64flt.print_f(mf3)
+        c64.CHROUT('\n')
+        c64flt.print_f(mf4)
+        c64.CHROUT('\n')
 
 
     }