From 9de7698a5ca7d976e162820850cf706d95c0f193 Mon Sep 17 00:00:00 2001
From: Irmen de Jong <irmen@razorvine.net>
Date: Sun, 7 Apr 2024 22:31:56 +0200
Subject: [PATCH] verafx.mult() and muls() now return both words of the 32 bits
 result.

---
 compiler/res/prog8lib/cx16/verafx.p8 | 32 +++++++++++++---------------
 docs/source/todo.rst                 |  4 +---
 examples/test.p8                     | 31 +++++++++++++++++----------
 3 files changed, 36 insertions(+), 31 deletions(-)

diff --git a/compiler/res/prog8lib/cx16/verafx.p8 b/compiler/res/prog8lib/cx16/verafx.p8
index eda9ce09c..d6b5d88de 100644
--- a/compiler/res/prog8lib/cx16/verafx.p8
+++ b/compiler/res/prog8lib/cx16/verafx.p8
@@ -111,17 +111,15 @@ verafx {
 
     ; unsigned multiplication just passes the values as signed to muls
     ; if you do this yourself in your call to muls, it will save a few instructions.
-    sub mult(uword value1, uword value2) -> uword {
-        ; Returns the lower 16 bits of the 32 bits result,
-        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
-        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
-        return muls(value1 as word, value2 as word) as uword
+    inline asmsub mult(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY, uword @R0 {
+        ; Returns the 32 bits unsigned result in AY and R0  (lower word, upper word).
+        %asm {{
+            jsr  verafx.muls
+        }}
     }
 
-    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
-        ; Returns the lower 16 bits of the 32 bits result in AY,
-        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
-        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
+    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 {
+        ; Returns the 32 bits signed result in AY and R0  (lower word, upper word).
         %asm {{
             lda  #(2 << 1)
             sta  cx16.VERA_CTRL        ; $9F25
@@ -131,13 +129,13 @@ verafx {
             lda  #(6 << 1)
             sta  cx16.VERA_CTRL        ; $9F25
             lda  cx16.r0
-            ldy  cx16.r0+1
             sta  cx16.VERA_FX_CACHE_L  ; $9F29
-            sty  cx16.VERA_FX_CACHE_M  ; $9F2A
+            lda  cx16.r0+1
+            sta  cx16.VERA_FX_CACHE_M  ; $9F2A
             lda  cx16.r1
-            ldy  cx16.r1+1
             sta  cx16.VERA_FX_CACHE_H  ; $9F2B
-            sty  cx16.VERA_FX_CACHE_U  ; $9F2C
+            lda  cx16.r1+1
+            sta  cx16.VERA_FX_CACHE_U  ; $9F2C
             lda  cx16.VERA_FX_ACCUM_RESET   ; $9F29 (DCSEL=6)
 
             ; Set the ADDR0 pointer to $1f9bc and write our multiplication result there
@@ -155,12 +153,12 @@ verafx {
             stz  cx16.VERA_DATA0      ; multiply and write out result
             lda  #%00010001           ; $01 with Increment 1
             sta  cx16.VERA_ADDR_H     ; so we can read out the result
-            lda  cx16.VERA_DATA0
+            lda  cx16.VERA_DATA0      ; store the lower 16 bits of the result in AY
             ldy  cx16.VERA_DATA0
-            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in r0
-            stx  cx16.r0
+            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in R0
+            stx  cx16.r0s
             ldx  cx16.VERA_DATA0
-            stx  cx16.r0+1
+            stx  cx16.r0s+1
             stz  cx16.VERA_FX_CTRL    ; Cache write disable
             stz  cx16.VERA_CTRL       ; reset DCSEL
             rts
diff --git a/docs/source/todo.rst b/docs/source/todo.rst
index e471e9bf4..c9f91c54f 100644
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -1,11 +1,9 @@
 TODO
 ====
 
-fix routines such as mult in verafx to return both 16-bit words of the result.
-
 ubyte x,y   compiles to more code than  ubyte x + ubyte y
 
-can we make ubyte x,y = cbm.SCREEN() work?
+can we make ubyte x,y = cbm.SCREEN() work?  (sugar for  ubyte x,y //  x,y=cbm.SCREEN() ?)
 
 ...
 
diff --git a/examples/test.p8 b/examples/test.p8
index 4ff59cba6..25bed855b 100644
--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,19 +1,28 @@
+%import textio
+%import verafx
 %zeropage basicsafe
 %option no_sysinit
 
 main {
-    romsub $2000 = func1() clobbers(X) -> ubyte @A, word @R0, byte @R1
-    romsub $3000 = func2() clobbers(X) -> ubyte @A, uword @R0, uword @R1
-    romsub $4000 = func3() clobbers(X) -> ubyte @R0
-
     sub start() {
-        bool flag
-        void cbm.GETIN()
-        flag, cx16.r1L = cbm.GETIN()
+        uword result, resulthi
+        result, resulthi = verafx.mult(9344, 6522)
+        txt.print_uwhex(resulthi, true)
+        txt.spc()
+        txt.print_uwhex(result, false)
+        txt.nl()
 
-        void, cx16.r0s, cx16.r1sL = func1()
-        void, cx16.r2, cx16.r1 = func2()
-        cx16.r0L = func3()
-        cx16.r0H = func3()
+        word sresult, sresulthi
+        sresult, sresulthi = verafx.muls(9344, -6522)
+        txt.print_w(sresulthi)
+        txt.spc()
+        txt.print_w(sresult)
+        txt.nl()
+
+        sresult, sresulthi = verafx.muls(144, -22)
+        txt.print_w(sresulthi)
+        txt.spc()
+        txt.print_w(sresult)
+        txt.nl()
     }
 }