From cd400886366c73bda22a5d293c6fc11962753f65 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Thu, 28 Sep 2023 03:18:49 +0200
Subject: [PATCH] vm: added math.mul16_last_upper()

---
 compiler/res/prog8lib/math.p8                 |   7 +-
 compiler/res/prog8lib/virtual/math.p8         | 115 ++++++++++--------
 docs/source/todo.rst                          |   1 -
 examples/test.p8                              |   4 +-
 virtualmachine/src/prog8/vm/SysCalls.kt       |   7 +-
 virtualmachine/src/prog8/vm/VirtualMachine.kt |  37 ++++--
 6 files changed, 104 insertions(+), 67 deletions(-)

diff --git a/compiler/res/prog8lib/math.p8 b/compiler/res/prog8lib/math.p8
index 1500fa824..e90f66e68 100644
--- a/compiler/res/prog8lib/math.p8
+++ b/compiler/res/prog8lib/math.p8
@@ -141,8 +141,13 @@ _sinecosR8	.char  trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
     }
 
     asmsub mul16_last_upper() -> uword @AY {
-        ; this routine peeks into the internal 32 bits multiplication result buffer of the
+        ; This routine peeks into the internal 32 bits multiplication result buffer of the
         ; 16*16 bits multiplication routine, to fetch the upper 16 bits of the last calculation.
+        ; Notes:
+        ;   - to avoid interference it's best to fetch and store this value immediately after the multiplication expression.
+        ;     for instance, simply printing a number may already result in new multiplication calls being performed
+        ;   - not all multiplications in the source code result in an actual multiplication call:
+        ;     some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all!
         %asm {{
             lda  multiply_words.result+2
             ldy  multiply_words.result+3
diff --git a/compiler/res/prog8lib/virtual/math.p8 b/compiler/res/prog8lib/virtual/math.p8
index 31fa36efd..bd9088037 100644
--- a/compiler/res/prog8lib/virtual/math.p8
+++ b/compiler/res/prog8lib/virtual/math.p8
@@ -212,59 +212,72 @@ math {
         }
     }
 
-sub direction(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
-    ; From a pair of positive coordinates, calculate discrete direction between 0 and 23 into A.
-    ; This adjusts the atan() result  so that the direction N is centered on the angle=N instead of having it as a boundary
-    ubyte angle = atan2(x1, y1, x2, y2) - 256/48
-    return 23-lsb(mkword(angle,0) / 2730)
-}
-
-sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
-    ; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
-    ; shift the points into the positive quadrant
-    ubyte px1
-    ubyte py1
-    ubyte px2
-    ubyte py2
-    if x1<0 or x2<0 {
-        px1 = x1 as ubyte + 128
-        px2 = x2 as ubyte + 128
-    } else {
-        px1 = x1 as ubyte
-        px2 = x2 as ubyte
-    }
-    if y1<0 or y2<0 {
-        py1 = y1 as ubyte + 128
-        py2 = y2 as ubyte + 128
-    } else {
-        py1 = y1 as ubyte
-        py2 = y2 as ubyte
+    sub direction(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
+        ; From a pair of positive coordinates, calculate discrete direction between 0 and 23 into A.
+        ; This adjusts the atan() result  so that the direction N is centered on the angle=N instead of having it as a boundary
+        ubyte angle = atan2(x1, y1, x2, y2) - 256/48
+        return 23-lsb(mkword(angle,0) / 2730)
     }
 
-    return direction(px1, py1, px2, py2)
-}
+    sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
+        ; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
+        ; shift the points into the positive quadrant
+        ubyte px1
+        ubyte py1
+        ubyte px2
+        ubyte py2
+        if x1<0 or x2<0 {
+            px1 = x1 as ubyte + 128
+            px2 = x2 as ubyte + 128
+        } else {
+            px1 = x1 as ubyte
+            px2 = x2 as ubyte
+        }
+        if y1<0 or y2<0 {
+            py1 = y1 as ubyte + 128
+            py2 = y2 as ubyte + 128
+        } else {
+            py1 = y1 as ubyte
+            py2 = y2 as ubyte
+        }
 
-sub direction_qd(ubyte quadrant, ubyte xdelta, ubyte ydelta) -> ubyte {
-    ; From a pair of X/Y deltas (both >=0), and quadrant 0-3, calculate discrete direction between 0 and 23.
-    when quadrant {
-        3 -> return direction(0, 0, xdelta, ydelta)
-        2 -> return direction(xdelta, 0, 0, ydelta)
-        1 -> return direction(0, ydelta, xdelta, 0)
-        else -> return direction(xdelta, ydelta, 0, 0)
+        return direction(px1, py1, px2, py2)
+    }
+
+    sub direction_qd(ubyte quadrant, ubyte xdelta, ubyte ydelta) -> ubyte {
+        ; From a pair of X/Y deltas (both >=0), and quadrant 0-3, calculate discrete direction between 0 and 23.
+        when quadrant {
+            3 -> return direction(0, 0, xdelta, ydelta)
+            2 -> return direction(xdelta, 0, 0, ydelta)
+            1 -> return direction(0, ydelta, xdelta, 0)
+            else -> return direction(xdelta, ydelta, 0, 0)
+        }
+    }
+
+    sub atan2(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
+        ;; Calculate the angle, in a 256-degree circle, between two points into A.
+        ;; The points (x1, y1) and (x2, y2) have to use *unsigned coordinates only* from the positive quadrant in the carthesian plane!
+        %ir {{
+            loadm.b r65532,math.atan2.x1
+            loadm.b r65533,math.atan2.y1
+            loadm.b r65534,math.atan2.x2
+            loadm.b r65535,math.atan2.y2
+            syscall 44 (r65532.b, r65533.b, r65534.b, r65535.b): r0.b
+            returnr.b r0
+        }}
+    }
+
+    sub mul16_last_upper() -> uword {
+        ; This routine peeks into the internal 32 bits multiplication result buffer of the
+        ; 16*16 bits multiplication routine, to fetch the upper 16 bits of the last calculation.
+        ; Notes:
+        ;   - to avoid interference it's best to fetch and store this value immediately after the multiplication expression.
+        ;     for instance, simply printing a number may already result in new multiplication calls being performed
+        ;   - not all multiplications in the source code result in an actual multiplication call:
+        ;     some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all!
+        %ir {{
+            syscall 46 (): r0.w
+            returnr.w r0
+        }}
     }
 }
-
-sub atan2(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
-    ;; Calculate the angle, in a 256-degree circle, between two points into A.
-    ;; The points (x1, y1) and (x2, y2) have to use *unsigned coordinates only* from the positive quadrant in the carthesian plane!
-    %ir {{
-        loadm.b r65532,math.atan2.x1
-        loadm.b r65533,math.atan2.y1
-        loadm.b r65534,math.atan2.x2
-        loadm.b r65535,math.atan2.y2
-        syscall 44 (r65532.b, r65533.b, r65534.b, r65535.b): r0.b
-        returnr.b r0
-    }}
-}
-
-}
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index 0390dd2d6..07ef9e65f 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,7 +1,6 @@
 TODO
 ====
 
-- VM: make matn.mul16_last_upper()
 - clean up the active file channel assumptions in diskio (basically do chkin every time and not in f_open?)
 - return the file channel number from f_open and f_open_w instead of just true (so user can change it and set it back if they want instead of relying on the magic numbers 12 and 13)
   OR just add routines to set it back to 12/13 so no tracking has to occur by the user at all
diff --git a/examples/test.p8 b/examples/test.p8
index 39457d3fa..47b854c1f 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,4 +1,5 @@
 %import textio
+%import math
 ;%import verafx
 %zeropage basicsafe
 %option no_sysinit
@@ -12,9 +13,10 @@ main {
         uword value1=5678
         uword value2=9999
         uword result = value1*value2
+        uword upper16 = math.mul16_last_upper()
         txt.print_uw(result)
         txt.spc()
-        txt.print_uw(math.mul16_last_upper())
+        txt.print_uw(upper16)
         txt.nl()
 
 
diff --git a/virtualmachine/src/prog8/vm/SysCalls.kt b/virtualmachine/src/prog8/vm/SysCalls.kt
index b796170eb..3dcaec73c 100644
--- a/virtualmachine/src/prog8/vm/SysCalls.kt
+++ b/virtualmachine/src/prog8/vm/SysCalls.kt
@@ -53,6 +53,7 @@ SYSCALLS:
 43 = CLAMP_FLOAT
 44 = ATAN
 45 = STR_TO_FLOAT
+46 = MUL16_LAST_UPPER
 */
 
 enum class Syscall {
@@ -101,7 +102,8 @@ enum class Syscall {
     CLAMP_UWORD,
     CLAMP_FLOAT,
     ATAN,
-    STR_TO_FLOAT
+    STR_TO_FLOAT,
+    MUL16_LAST_UPPER
     ;
 
     companion object {
@@ -490,6 +492,9 @@ object SysCalls {
                 val result = floor(radians/2.0/PI*256.0)
                 returnValue(callspec.returns!!, result, vm)
             }
+            Syscall.MUL16_LAST_UPPER -> {
+                returnValue(callspec.returns!!, vm.mul16_last_upper, vm)
+            }
         }
     }
 }
diff --git a/virtualmachine/src/prog8/vm/VirtualMachine.kt b/virtualmachine/src/prog8/vm/VirtualMachine.kt
index e77005d92..6ebc9abf4 100644
--- a/virtualmachine/src/prog8/vm/VirtualMachine.kt
+++ b/virtualmachine/src/prog8/vm/VirtualMachine.kt
@@ -47,6 +47,7 @@ class VirtualMachine(irProgram: IRProgram) {
     var statusNegative = false
     internal var randomGenerator = Random(0xa55a7653)
     internal var randomGeneratorFloats = Random(0xc0d3dbad)
+    internal var mul16_last_upper = 0u
     val cx16virtualregsBaseAddress: Int
 
     init {
@@ -1447,10 +1448,14 @@ class VirtualMachine(irProgram: IRProgram) {
     private fun plusMinusMultAnyWord(operator: String, reg1: Int, reg2: Int) {
         val left = registers.getUW(reg1)
         val right = registers.getUW(reg2)
-        val result = when(operator) {
-            "+" -> left + right
-            "-" -> left - right
-            "*" -> left * right
+        val result: UInt
+        when(operator) {
+            "+" -> result = left + right
+            "-" -> result = left - right
+            "*" -> {
+                result = left.toUInt() * right
+                mul16_last_upper = result shr 16
+            }
             else -> throw IllegalArgumentException("operator word $operator")
         }
         registers.setUW(reg1, result.toUShort())
@@ -1458,10 +1463,14 @@ class VirtualMachine(irProgram: IRProgram) {
 
     private fun plusMinusMultConstWord(operator: String, reg1: Int, value: UShort) {
         val left = registers.getUW(reg1)
-        val result = when(operator) {
-            "+" -> left + value
-            "-" -> left - value
-            "*" -> left * value
+        val result: UInt
+        when(operator) {
+            "+" -> result = left + value
+            "-" -> result = left - value
+            "*" -> {
+                result = left.toUInt() * value
+                mul16_last_upper = result shr 16
+            }
             else -> throw IllegalArgumentException("operator word $operator")
         }
         registers.setUW(reg1, result.toUShort())
@@ -1470,10 +1479,14 @@ class VirtualMachine(irProgram: IRProgram) {
     private fun plusMinusMultAnyWordInplace(operator: String, reg1: Int, address: Int) {
         val memvalue = memory.getUW(address)
         val operand = registers.getUW(reg1)
-        val result = when(operator) {
-            "+" -> memvalue + operand
-            "-" -> memvalue - operand
-            "*" -> memvalue * operand
+        val result: UInt
+        when(operator) {
+            "+" -> result = memvalue + operand
+            "-" -> result = memvalue - operand
+            "*" -> {
+                result = memvalue.toUInt() * operand
+                mul16_last_upper = result shr 16
+            }
             else -> throw IllegalArgumentException("operator word $operator")
         }
         memory.setUW(address, result.toUShort())