verafx.mult/muls now return upper 16 bits of the result in r0

2025-02-16 07:31:48 +00:00 · 2023-11-06 21:55:58 +01:00 · 2023-11-06 21:55:58 +01:00 · ee2888e744
commit ee2888e744
parent efe4df92dc
4 changed files with 26 additions and 17 deletions
--- a/compiler/res/prog8lib/cx16/verafx.p8
+++ b/compiler/res/prog8lib/cx16/verafx.p8
@ -65,10 +65,16 @@ verafx {
    ; unsigned multiplication just passes the values as signed to muls
    ; if you do this yourself in your call to muls, it will save a few instructions.
    sub mult(uword value1, uword value2) -> uword {
+        ; Returns the lower 16 bits of the 32 bits result,
+        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
+        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
        return muls(value1 as word, value2 as word) as uword
    }

-    asmsub muls(word value1 @R0, word value2 @R1) -> word @AY {
+    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
+        ; Returns the lower 16 bits of the 32 bits result in AY,
+        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
+        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
        %asm {{
            lda  #(2 << 1)
            sta  cx16.VERA_CTRL        ; $9F25
@ -104,14 +110,13 @@ verafx {
            sta  cx16.VERA_ADDR_H     ; so we can read out the result
            lda  cx16.VERA_DATA0
            ldy  cx16.VERA_DATA0
+            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in r0
+            stx  cx16.r0
+            ldx  cx16.VERA_DATA0
+            stx  cx16.r0+1
            stz  cx16.VERA_FX_CTRL    ; Cache write disable
            stz  cx16.VERA_CTRL       ; reset DCSEL
            rts
-; we skip the upper 16 bits of the result:
-;            lda  cx16.VERA_DATA0
-;            sta  $0402
-;            lda  cx16.VERA_DATA0
-;            sta  $0403
        }}
    }

--- a/compilerAst/src/prog8/ast/antlr/Antlr2Kotlin.kt
+++ b/compilerAst/src/prog8/ast/antlr/Antlr2Kotlin.kt
@ -332,7 +332,7 @@ private fun Prog8ANTLRParser.ClobberContext.toAst() : Set<CpuRegister> {
    try {
        return names.map { CpuRegister.valueOf(it) }.toSet()
    } catch(ax: IllegalArgumentException) {
-        throw SyntaxError("invalid pu register", toPosition())
+        throw SyntaxError("invalid cpu register", toPosition())
    }
 }

--- a/docs/source/libraries.rst
+++ b/docs/source/libraries.rst
@ -415,6 +415,10 @@ but perhaps the provided ones can be of service too.
    Returns the absolute difference, or distance, between the two word values.
    (This routine is more efficient than doing a compare and a subtract separately, or using abs)

+``mul16_last_upper () -> uword``
+    Fetches the upper 16 bits of the previous 16*16 bit multiplication.
+    To avoid corrupting the result, it is best performed immediately after the multiplication.
+

 cx16logo
 --------
@ -543,6 +547,9 @@ the emulators already support it).
    Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block
    by calls to verafx.muls/mult, but be careful with it because it may interfere with other Vera operations or IRQs.

+    Note: the lower 16 bits of the 32 bits result is returned as the normal subroutine's returnvalue,
+    but the upper 16 bits is returned in `cx16.r0` so you can still access those separately.
+
 ``clear``
    Very quickly clear a piece of vram to a given byte value (it writes 4 bytes at a time).
    The routine is around 3 times faster as a regular unrolled loop to clear vram.
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,17 +1,14 @@
 %zeropage basicsafe
 %option no_sysinit
 %import textio
+%import verafx

 main {
-  const ubyte FOO = 0
-  const ubyte BAR = 1
-
-  sub start() {
-    when FOO+BAR {
-        1-> txt.print("path 1")
-        2-> txt.print("path 2")
-        else-> txt.print("path 3")
+    sub start() {
+        uword lower16 = verafx.mult(11111,9988)
+        uword upper16 = cx16.r0
+        txt.print_uwhex(upper16, true)   ; $069d5e9c  = 110976668
+        txt.print_uwhex(lower16, false)
+        txt.nl()
    }
-    txt.nl()
-  }
 }