fix word multiplication to not clobber r0 and r1 anymore

This was causing corruption in certain programs such as the cx16/amiga example. The problem was introduced in 9.4 with the new multiply_words routine
2025-07-25 11:24:15 +00:00 · 2023-09-02 12:02:07 +02:00
parent 3b786c819d
commit e97303c226
5 changed files with 65 additions and 45 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
@@ -543,7 +543,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                    return true
                }
                in WordDatatypes -> {
-                    asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
+                    asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
                    asmgen.out("  jsr  math.multiply_words")
                    assignRegisterpairWord(target, RegisterOrPair.AY)
                    return true
@@ -567,7 +567,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
                        asmgen.out("  jsr  math.mul_word_${value}")
                    }
                    else {
-                        asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
+                        asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
                        asmgen.out("  jsr  math.multiply_words")
                    }
                    assignRegisterpairWord(target, RegisterOrPair.AY)
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
@@ -1335,9 +1335,9 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                } else {
                    asmgen.out("""
                        lda  $name
-                        sta  cx16.r0
+                        sta  math.multiply_words.multiplier
                        lda  $name+1
-                        sta  cx16.r0+1
+                        sta  math.multiply_words.multiplier+1
                        lda  #<$value
                        ldy  #>$value
                        jsr  math.multiply_words
@@ -1786,15 +1786,15 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                    }
                    "*" -> {
                        if(valueDt==DataType.UBYTE) {
-                            asmgen.out("  lda  $otherName |  sta  cx16.r0")
+                            asmgen.out("  lda  $otherName |  sta  math.multiply_words.multiplier")
                            if(asmgen.isTargetCpu(CpuType.CPU65c02))
-                                asmgen.out("  stz  cx16.r0+1")
+                                asmgen.out("  stz  math.multiply_words.multiplier+1")
                            else
-                                asmgen.out("  lda  #0 |  sta  cx16.r0+1")
+                                asmgen.out("  lda  #0 |  sta  math.multiply_words.multiplier+1")
                        } else {
                            asmgen.out("  lda  $otherName")
                            asmgen.signExtendAYlsb(valueDt)
-                            asmgen.out("  sta  cx16.r0 |  sty  cx16.r0+1")
+                            asmgen.out("  sta  math.multiply_words.multiplier |  sty  math.multiply_words.multiplier+1")
                        }
                        asmgen.out("""
                                lda  $name
@@ -1930,31 +1930,16 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
                    "+" -> asmgen.out("  lda  $name |  clc |  adc  $otherName |  sta  $name |  lda  $name+1 |  adc  $otherName+1 |  sta  $name+1")
                    "-" -> asmgen.out("  lda  $name |  sec |  sbc  $otherName |  sta  $name |  lda  $name+1 |  sbc  $otherName+1 |  sta  $name+1")
                    "*" -> {
-                        if(otherName=="cx16.r0")
-                            asmgen.out("""
-                                lda  $name
-                                ldy  $name+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
-                        else if(name=="cx16.r0")
-                            asmgen.out("""
-                                lda  $otherName
-                                ldy  $otherName+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
-                        else
-                            asmgen.out("""
-                                lda  $otherName
-                                ldy  $otherName+1
-                                sta  cx16.r0
-                                sty  cx16.r0+1
-                                lda  $name
-                                ldy  $name+1
-                                jsr  math.multiply_words
-                                sta  $name
-                                sty  $name+1""")
+                        asmgen.out("""
+                            lda  $otherName
+                            ldy  $otherName+1
+                            sta  math.multiply_words.multiplier
+                            sty  math.multiply_words.multiplier+1
+                            lda  $name
+                            ldy  $name+1
+                            jsr  math.multiply_words
+                            sta  $name
+                            sty  $name+1""")
                    }
                    "/" -> {
                        if(dt==DataType.WORD) {
@@ -2135,8 +2120,8 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
    private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
        fun multiplyVarByWordInAY() {
            asmgen.out("""
-                sta  cx16.r0
-                sty  cx16.r0+1
+                sta  math.multiply_words.multiplier
+                sty  math.multiply_words.multiplier+1
                lda  $name
                ldy  $name+1
                jsr  math.multiply_words
--- a/compiler/res/prog8lib/math.asm
+++ b/compiler/res/prog8lib/math.asm
@@ -56,9 +56,12 @@ _multiplier      = P8ZP_SCRATCH_REG

 multiply_words	.proc
 	; -- multiply two 16-bit words into a 32-bit result  (signed and unsigned)
-	;      input: A/Y = first 16-bit number, cx16.R0 = second 16-bit number
-	;      output: multiply_words.result == cx16.R0:R1, 4-bytes/32-bits product, LSB order (low-to-high)  low 16 bits also in AY.
-	; TODO: should not use R0 and R1 at all !!!  result needs 4 consecutive bytes, so it can't be in zeropage at all...
+	;      input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number
+	;      output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high)  low 16 bits also in AY.
+
+	; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array.
+	;       this routine could be faster if we could stick that into zeropage,
+	;       but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...

 ; mult62.a
 ; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
@@ -73,9 +76,8 @@ multiply_words	.proc
 ; Average cycles:
 ; 93 bytes

-_multiplicand    = P8ZP_SCRATCH_W1   ; 2 bytes
-_multiplier      = cx16.r0   ; 2 bytes
-result           = cx16.r0   ; 4 bytes   (note: shares memory with multiplier)  so is r0 and ALSO r1.
+_multiplicand    = P8ZP_SCRATCH_W2   ; 2 bytes
+multiplier      = result

 ; 16 bit x 16 bit unsigned multiply, 32 bit result
 ;
@@ -175,6 +177,9 @@ _inner_loop2
    lda  result
    ldy  result+1
    rts
+
+result		.byte  0,0,0,0
+
 		.pend


--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,10 +1,8 @@
 TODO
 ====
+- fix on c64 target: examples/cube3d-float (broken since 9.3 with the evalstack removal)  it works on x16 target, oddly enough.
+  More detailed and simpler code for this problem in floatproblem64.p8

- fix: amiga example with noopt draws wrong lines, caused by "2x faster word multiplication routine" because it trashes r0 and r1 now
-  multiply_words in math.asm needs fixing.
-
- fix: test all other things with noopt once again!  (examples/c64 are all ok)
 - fix: search for TODO("swap operand order")
 - optimize: search for TODO optimize: don't use scratch var
 - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?
--- a/floatproblem64.p8
+++ b/floatproblem64.p8
@@ -0,0 +1,32 @@
+%import textio
+%import floats
+%zeropage dontuse
+
+main {
+    sub start()  {
+        float value1 = -0.8
+        float value2 = 0.3
+        float two = 2.0
+
+        float result = value1*two + value2*two  ; TODO FIX: invalid result on c64, ok when the *two is removed or expression is split (it's not caused by pushFAC1/popFAC1)
+        floats.print_f(result)
+        txt.nl()
+        txt.print("-1 was expected\n\n")       ; on C64: -1.1 is printed :(
+
+        result = value2*two + value1*two        ; swapped operands around, now it's suddenly fine on C64...
+        floats.print_f(result)
+        txt.nl()
+        txt.print("-1 was expected\n\n")       ; on C64: correct value is printed
+
+
+        value1 = 0.8
+        value2 = 0.3
+        result = value1*two + value2*two
+        floats.print_f(result)
+        txt.nl()
+        txt.print("2.2 was expected\n\n")       ; on C64: correct value is printed
+
+        repeat {
+        }
+    }
+}