code size optimization: don't copy floats with inlined copy code but use copy_float routine

2024-07-05 06:29:02 +00:00 · 2022-01-09 16:18:13 +01:00 · 2022-01-09 16:18:13 +01:00 · 50c16fe6de
commit 50c16fe6de
parent b092d1a5d3
7 changed files with 64 additions and 108 deletions
--- a/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/ExpressionsAsmGen.kt
+++ b/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/ExpressionsAsmGen.kt
@ -702,7 +702,7 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
                    asmgen.out("  lda  $arrayVarName+$indexValue |  sta  P8ESTACK_LO,x |  lda  $arrayVarName+$indexValue+1 |  sta  P8ESTACK_HI,x |  dex")
                }
                DataType.FLOAT -> {
-                    asmgen.out("  lda  #<$arrayVarName+$indexValue |  ldy  #>$arrayVarName+$indexValue |  jsr  floats.push_float")
+                    asmgen.out("  lda  #<($arrayVarName+$indexValue) |  ldy  #>($arrayVarName+$indexValue) |  jsr  floats.push_float")
                }
                else -> throw AssemblyError("weird element type")
            }
--- a/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/PostIncrDecrAsmGen.kt
+++ b/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/PostIncrDecrAsmGen.kt
@ -83,7 +83,7 @@ internal class PostIncrDecrAsmGen(private val program: Program, private val asmg
 """)
                        }
                        DataType.FLOAT -> {
-                            asmgen.out("  lda  #<$asmArrayvarname+$indexValue |  ldy  #>$asmArrayvarname+$indexValue")
+                            asmgen.out("  lda  #<($asmArrayvarname+$indexValue) |  ldy  #>($asmArrayvarname+$indexValue)")
                            asmgen.out(if(incr) "  jsr  floats.inc_var_f" else "  jsr  floats.dec_var_f")
                        }
                        else -> throw AssemblyError("need numeric type")
--- a/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/assignment/AssignmentAsmGen.kt
+++ b/codeGeneration/src/prog8/codegen/target/cpu6502/codegen/assignment/AssignmentAsmGen.kt
@ -88,7 +88,7 @@ internal class AssignmentAsmGen(private val program: Program, private val asmgen
                            assignRegisterpairWord(assign.target, RegisterOrPair.AY)
                        }
                        DataType.FLOAT -> {
-                            asmgen.out("  lda  #<$arrayVarName+$indexValue |  ldy  #>$arrayVarName+$indexValue")
+                            asmgen.out("  lda  #<($arrayVarName+$indexValue) |  ldy  #>($arrayVarName+$indexValue)")
                            assignFloatFromAY(assign.target)
                        }
                        else ->
@ -950,8 +950,8 @@ $containsLabel      lda  #1
                        }
                        DataType.FLOAT -> {
                            asmgen.out("""
-                                lda  #<${target.asmVarname}+$scaledIdx
-                                ldy  #>${target.asmVarname}+$scaledIdx
+                                lda  #<(${target.asmVarname}+$scaledIdx)
+                                ldy  #>(${target.asmVarname}+$scaledIdx)
                                jsr  floats.pop_float
                            """)
                        }
@ -1163,8 +1163,8 @@ $containsLabel      lda  #1
                                ldy  #>$sourceName
                                sta  P8ZP_SCRATCH_W1
                                sty  P8ZP_SCRATCH_W1+1
-                                lda  #<${target.asmVarname}+$scaledIdx
-                                ldy  #>${target.asmVarname}+$scaledIdx
+                                lda  #<(${target.asmVarname}+$scaledIdx)
+                                ldy  #>(${target.asmVarname}+$scaledIdx)
                                jsr  floats.copy_float
                            """)
                        }
@ -1316,17 +1316,13 @@ $containsLabel      lda  #1
        when(target.kind) {
            TargetStorageKind.VARIABLE -> {
                asmgen.out("""
-                    lda  $sourceName
-                    sta  ${target.asmVarname}
-                    lda  $sourceName+1
-                    sta  ${target.asmVarname}+1
-                    lda  $sourceName+2
-                    sta  ${target.asmVarname}+2
-                    lda  $sourceName+3
-                    sta  ${target.asmVarname}+3
-                    lda  $sourceName+4
-                    sta  ${target.asmVarname}+4
-                """)
+                    lda  #<$sourceName
+                    ldy  #>$sourceName
+                    sta  P8ZP_SCRATCH_W1
+                    sty  P8ZP_SCRATCH_W1+1
+                    lda  #<${target.asmVarname}
+                    ldy  #>${target.asmVarname}
+                    jsr  floats.copy_float""")
            }
            TargetStorageKind.ARRAY -> {
                asmgen.out("""
@ -2047,17 +2043,13 @@ $containsLabel      lda  #1
            when(target.kind) {
                TargetStorageKind.VARIABLE -> {
                    asmgen.out("""
-                            lda  $constFloat
-                            sta  ${target.asmVarname}
-                            lda  $constFloat+1
-                            sta  ${target.asmVarname}+1
-                            lda  $constFloat+2
-                            sta  ${target.asmVarname}+2
-                            lda  $constFloat+3
-                            sta  ${target.asmVarname}+3
-                            lda  $constFloat+4
-                            sta  ${target.asmVarname}+4
-                        """)
+                        lda  #<$constFloat
+                        ldy  #>$constFloat
+                        sta  P8ZP_SCRATCH_W1
+                        sty  P8ZP_SCRATCH_W1+1
+                        lda  #<${target.asmVarname}
+                        ldy  #>${target.asmVarname}
+                        jsr  floats.copy_float""")
                }
                TargetStorageKind.ARRAY -> {
                    val arrayVarName = target.asmVarname
@ -2065,17 +2057,13 @@ $containsLabel      lda  #1
                    if (constIndex!=null) {
                        val indexValue = constIndex * program.memsizer.memorySize(DataType.FLOAT)
                        asmgen.out("""
-                            lda  $constFloat
-                            sta  $arrayVarName+$indexValue
-                            lda  $constFloat+1
-                            sta  $arrayVarName+$indexValue+1
-                            lda  $constFloat+2
-                            sta  $arrayVarName+$indexValue+2
-                            lda  $constFloat+3
-                            sta  $arrayVarName+$indexValue+3
-                            lda  $constFloat+4
-                            sta  $arrayVarName+$indexValue+4
-                        """)
+                            lda  #<$constFloat
+                            ldy  #>$constFloat
+                            sta  P8ZP_SCRATCH_W1
+                            sty  P8ZP_SCRATCH_W1+1
+                            lda  #<($arrayVarName+$indexValue)
+                            ldy  #>($arrayVarName+$indexValue)
+                            jsr  floats.copy_float""")
                    } else {
                        val asmvarname = asmgen.asmVariableName(target.array.indexer.indexExpr as IdentifierReference)
                        asmgen.out("""
--- a/compiler/res/prog8lib/c64/floats.asm
+++ b/compiler/res/prog8lib/c64/floats.asm
@ -248,15 +248,25 @@ pop_float_fac1	.proc
 		.pend

 copy_float	.proc
-		; -- copies the 5 bytes of the mflt value pointed to by SCRATCH_ZPWORD1,
+		; -- copies the 5 bytes of the mflt value pointed to by P8ZP_SCRATCH_W1,
 		;    into the 5 bytes pointed to by A/Y.  Clobbers A,Y.
-		sta  _target+1
-		sty  _target+2
-		ldy  #4
-_loop		lda  (P8ZP_SCRATCH_W1),y
-_target		sta  $ffff,y			; modified
-		dey
-		bpl  _loop
+		sta  P8ZP_SCRATCH_W2
+		sty  P8ZP_SCRATCH_W2+1
+		ldy  #0
+		lda  (P8ZP_SCRATCH_W1),y
+		sta  (P8ZP_SCRATCH_W2),y
+		iny
+		lda  (P8ZP_SCRATCH_W1),y
+		sta  (P8ZP_SCRATCH_W2),y
+		iny
+		lda  (P8ZP_SCRATCH_W1),y
+		sta  (P8ZP_SCRATCH_W2),y
+		iny
+		lda  (P8ZP_SCRATCH_W1),y
+		sta  (P8ZP_SCRATCH_W2),y
+		iny
+		lda  (P8ZP_SCRATCH_W1),y
+		sta  (P8ZP_SCRATCH_W2),y
 		rts
 		.pend

--- a/compiler/test/TestCompilerOnRanges.kt
+++ b/compiler/test/TestCompilerOnRanges.kt
@ -60,7 +60,7 @@ class TestCompilerOnRanges: FunSpec({
    test("testFloatArrayInitializerWithRange_char_to_char") {
        val platform = C64Target
        val result = compileText(platform, optimize = false, """
-            %option enable_floats
+            %import floats
            main {
                sub start() {
                    float[] cs = 'a' to 'z' ; values are computed at compile time 
@ -92,9 +92,9 @@ class TestCompilerOnRanges: FunSpec({
    context("floatArrayInitializerWithRange") {
        val combos = cartesianProduct(
            listOf("", "42", "41"),                 // sizeInDecl
-            listOf("%option enable_floats", ""),    // optEnableFloats
+            listOf("%import floats", ""),           // optEnableFloats
            listOf(Cx16Target, C64Target),          // platform
-            listOf(false, true)                    // optimize
+            listOf(false, true)                     // optimize
        )

        combos.forEach {
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -3,9 +3,6 @@ TODO

 For next compiler release (7.7)
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
- copying floats around: do it with a subroutine rather than 5 lda/sta pairs .
-  is slower but floats are very slow already anyway and this should take a lot less program size.
-  -> assignVariableFloat()  + assignConstantFloat()
 - don't remove dead variable assignment if it contains a function call
 - optimize codegen of pipe operator to avoid needless assigns to temp var
 - why is this using stack evaluation:  bb = ww>0  (if ww>0 is not using stack!)
--- a/examples/test.p8
+++ b/examples/test.p8
@ -5,57 +5,19 @@

 main {
    sub start() {
-        byte bb = 0
-        word ww = 0
-        float fl = 0
+        float fl = 64
+        float[] farr =[1.111,2.222,3.333]
+        fl = farr[0]
+        floats.print_f(fl)
+        fl = farr[1]
+        floats.print_f(fl)
+        farr[0] = 9.999
+        fl = farr[0]
+        floats.print_f(fl)

-        ubyte @shared ub

-        if ww==0 {
-            ub++
-        }
-        if ww!=0 {
-            ub++
-        }
-        if ww>0 {
-            ub++
-        }
-        if ww<0 {
-            ub++
-        }
-        if ww<=0 {
-            ub++
-        }
-        if ww>=0 {
-            ub++
-        }
-
-;        if fl< 0 {
-;            txt.print("wrong fl\n")
-;        }
-;        fl=-1.111
-;        if 0>fl or fl==2 {
-;            txt.print("good fl\n")
-;        }
-;
-;        if ww< 0 {
-;            txt.print("wrong ww\n")
-;        }
-;        if bb<0 {
-;            txt.print("wrong bb\n")
-;        }
-;        bb = -1
-;        ww = -1111
-;        if 0>ww or ww==2 {
-;            txt.print("good ww\n")
-;        }
-;        if 0>bb or bb==2 {
-;            txt.print("good bb\n")
-;        }
-;        float @shared f1
-;
-;        f1 =   1.234 |> addfloat1 |> addfloat2 |> addfloat3         ; TODO fix that the value is actually returned
-;        floats.print_f(f1)
+;        fl =   1.234 |> addfloat1 |> addfloat2 |> addfloat3
+;        floats.print_f(fl)
 ;        txt.nl()
 ;        1.234 |> addfloat1
 ;            |> addfloat2 |> addfloat3 |> floats.print_f
@ -69,7 +31,6 @@ main {
 ;        txt.nl()

 ;        test_stack.test()
-;         ; TODO fix that the value is actually returned (398) and that X register is preserved:
 ;        uword @shared uw=  9+3 |> assemblything
 ;                             |> sin8u
 ;                             |> add_one
@ -96,12 +57,12 @@ main {
    }

    sub times_two(ubyte input) -> uword {
-        return input*$0002
+        return input*$6464642
    }

    asmsub assemblything(ubyte input @A) clobbers(X,Y) -> ubyte @A {
        %asm {{
-            ldx #0
+            ldx #64
            asl a
            rts
        }}
@ -121,7 +82,7 @@ main {
 ;            jsr  floats.FOUT
 ;            sta  $7e
 ;            sty  $7f
-;            ldy  #0
+;            ldy  #64
 ;_loop
 ;            lda  ($7e),y
 ;            beq  _done
@ -131,7 +92,7 @@ main {
 ;_done
 ;            rts
 ;
-;float5_111	.byte  $81, $0e, $14, $7a, $e1  ; float 1.11
+;float5_111	.byte  $81, $64e, $14, $7a, $e1  ; float 1.11
 ;float5_122	.byte  $81, $1c, $28, $f5, $c2  ; float 1.22
 ;
 ;        }}