From e97303c226bafba0a3e6e490ff9fc1cef65f0da3 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Sat, 2 Sep 2023 12:02:07 +0200
Subject: [PATCH] fix word multiplication to not clobber r0 and r1 anymore

This was causing corruption in certain programs such as the cx16/amiga example.
The problem was introduced in 9.4 with the new multiply_words routine
---
 .../cpu6502/assignment/AssignmentAsmGen.kt    |  4 +-
 .../assignment/AugmentableAssignmentAsmGen.kt | 51 +++++++------------
 compiler/res/prog8lib/math.asm                | 17 ++++---
 docs/source/todo.rst                          |  6 +--
 floatproblem64.p8                             | 32 ++++++++++++
 5 files changed, 65 insertions(+), 45 deletions(-)
 create mode 100644 floatproblem64.p8

diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
index 7897110b9..052f01fb5 100644
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
@@ -543,7 +543,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                     return true
                 }
                 in WordDatatypes -> {
-                    asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
+                    asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
                     asmgen.out("  jsr  math.multiply_words")
                     assignRegisterpairWord(target, RegisterOrPair.AY)
                     return true
@@ -567,7 +567,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                         asmgen.out("  jsr  math.mul_word_${value}")
                     }
                     else {
-                        asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
+                        asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
                         asmgen.out("  jsr  math.multiply_words")
                     }
                     assignRegisterpairWord(target, RegisterOrPair.AY)
diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
index 0e9812e64..3e43feec4 100644
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
@@ -1335,9 +1335,9 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                 } else {
                     asmgen.out("""
                         lda  $name
-                        sta  cx16.r0
+                        sta  math.multiply_words.multiplier
                         lda  $name+1
-                        sta  cx16.r0+1
+                        sta  math.multiply_words.multiplier+1
                         lda  #<$value
                         ldy  #>$value
                         jsr  math.multiply_words
@@ -1786,15 +1786,15 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                     }
                     "*" -> {
                         if(valueDt==DataType.UBYTE) {
-                            asmgen.out("  lda  $otherName |  sta  cx16.r0")
+                            asmgen.out("  lda  $otherName |  sta  math.multiply_words.multiplier")
                             if(asmgen.isTargetCpu(CpuType.CPU65c02))
-                                asmgen.out("  stz  cx16.r0+1")
+                                asmgen.out("  stz  math.multiply_words.multiplier+1")
                             else
-                                asmgen.out("  lda  #0 |  sta  cx16.r0+1")
+                                asmgen.out("  lda  #0 |  sta  math.multiply_words.multiplier+1")
                         } else {
                             asmgen.out("  lda  $otherName")
                             asmgen.signExtendAYlsb(valueDt)
-                            asmgen.out("  sta  cx16.r0 |  sty  cx16.r0+1")
+                            asmgen.out("  sta  math.multiply_words.multiplier |  sty  math.multiply_words.multiplier+1")
                         }
                         asmgen.out("""
                                 lda  $name
@@ -1930,31 +1930,16 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                     "+" -> asmgen.out("  lda  $name |  clc |  adc  $otherName |  sta  $name |  lda  $name+1 |  adc  $otherName+1 |  sta  $name+1")
                     "-" -> asmgen.out("  lda  $name |  sec |  sbc  $otherName |  sta  $name |  lda  $name+1 |  sbc  $otherName+1 |  sta  $name+1")
                     "*" -> {
-                        if(otherName=="cx16.r0")
-                            asmgen.out("""
-                                lda  $name
-                                ldy  $name+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
-                        else if(name=="cx16.r0")
-                            asmgen.out("""
-                                lda  $otherName
-                                ldy  $otherName+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
-                        else
-                            asmgen.out("""
-                                lda  $otherName
-                                ldy  $otherName+1
-                                sta  cx16.r0
-                                sty  cx16.r0+1
-                                lda  $name
-                                ldy  $name+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
+                        asmgen.out("""
+                            lda  $otherName
+                            ldy  $otherName+1
+                            sta  math.multiply_words.multiplier
+                            sty  math.multiply_words.multiplier+1
+                            lda  $name
+                            ldy  $name+1
+                            jsr  math.multiply_words
+                            sta  $name
+                            sty  $name+1""")
                     }
                     "/" -> {
                         if(dt==DataType.WORD) {
@@ -2135,8 +2120,8 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
     private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
         fun multiplyVarByWordInAY() {
             asmgen.out("""
-                sta  cx16.r0
-                sty  cx16.r0+1
+                sta  math.multiply_words.multiplier
+                sty  math.multiply_words.multiplier+1
                 lda  $name
                 ldy  $name+1
                 jsr  math.multiply_words
diff --git a/compiler/res/prog8lib/math.asm b/compiler/res/prog8lib/math.asm
index a7c876992..2b63d1fd8 100644
--- a/compiler/res/prog8lib/math.asm
+++ b/compiler/res/prog8lib/math.asm
@@ -56,9 +56,12 @@ _multiplier      = P8ZP_SCRATCH_REG
 
 multiply_words	.proc
 	; -- multiply two 16-bit words into a 32-bit result  (signed and unsigned)
-	;      input: A/Y = first 16-bit number, cx16.R0 = second 16-bit number
-	;      output: multiply_words.result == cx16.R0:R1, 4-bytes/32-bits product, LSB order (low-to-high)  low 16 bits also in AY.
-	; TODO: should not use R0 and R1 at all !!!  result needs 4 consecutive bytes, so it can't be in zeropage at all...
+	;      input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number
+	;      output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high)  low 16 bits also in AY.
+
+	; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array.
+	;       this routine could be faster if we could stick that into zeropage,
+	;       but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...
 
 ; mult62.a
 ; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
@@ -73,9 +76,8 @@ multiply_words	.proc
 ; Average cycles:
 ; 93 bytes
 
-_multiplicand    = P8ZP_SCRATCH_W1   ; 2 bytes
-_multiplier      = cx16.r0   ; 2 bytes
-result           = cx16.r0   ; 4 bytes   (note: shares memory with multiplier)  so is r0 and ALSO r1.
+_multiplicand    = P8ZP_SCRATCH_W2   ; 2 bytes
+multiplier      = result
 
 ; 16 bit x 16 bit unsigned multiply, 32 bit result
 ;
@@ -175,6 +177,9 @@ _inner_loop2
     lda  result
     ldy  result+1
     rts
+
+result		.byte  0,0,0,0
+
 		.pend
 
 
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index b846a5398..aa42ffb74 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,10 +1,8 @@
 TODO
 ====
+- fix on c64 target: examples/cube3d-float (broken since 9.3 with the evalstack removal)  it works on x16 target, oddly enough.
+  More detailed and simpler code for this problem in floatproblem64.p8
 
-- fix: amiga example with noopt draws wrong lines, caused by "2x faster word multiplication routine" because it trashes r0 and r1 now
-  multiply_words in math.asm needs fixing.
-
-- fix: test all other things with noopt once again!  (examples/c64 are all ok)
 - fix: search for TODO("swap operand order")
 - optimize: search for TODO optimize: don't use scratch var
 - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?
diff --git a/floatproblem64.p8 b/floatproblem64.p8
new file mode 100644
index 000000000..746f071ab
--- /dev/null
+++ b/floatproblem64.p8
@@ -0,0 +1,32 @@
+%import textio
+%import floats
+%zeropage dontuse
+
+main {
+    sub start()  {
+        float value1 = -0.8
+        float value2 = 0.3
+        float two = 2.0
+
+        float result = value1*two + value2*two  ; TODO FIX: invalid result on c64, ok when the *two is removed or expression is split (it's not caused by pushFAC1/popFAC1)
+        floats.print_f(result)
+        txt.nl()
+        txt.print("-1 was expected\n\n")       ; on C64: -1.1 is printed :(
+
+        result = value2*two + value1*two        ; swapped operands around, now it's suddenly fine on C64...
+        floats.print_f(result)
+        txt.nl()
+        txt.print("-1 was expected\n\n")       ; on C64: correct value is printed
+
+
+        value1 = 0.8
+        value2 = 0.3
+        result = value1*two + value2*two
+        floats.print_f(result)
+        txt.nl()
+        txt.print("2.2 was expected\n\n")       ; on C64: correct value is printed
+
+        repeat {
+        }
+    }
+}