verafx.mult() and muls() now return both words of the 32 bits result.

2024-05-29 01:41:32 +00:00 · 2024-04-07 22:31:56 +02:00 · 2024-04-07 22:31:56 +02:00 · 9de7698a5c
commit 9de7698a5c
parent 112d2d6058
3 changed files with 36 additions and 31 deletions
--- a/compiler/res/prog8lib/cx16/verafx.p8
+++ b/compiler/res/prog8lib/cx16/verafx.p8
@ -111,17 +111,15 @@ verafx {
    ; unsigned multiplication just passes the values as signed to muls
    ; if you do this yourself in your call to muls, it will save a few instructions.
-    sub mult(uword value1, uword value2) -> uword {
+    inline asmsub mult(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY, uword @R0 {
-        ; Returns the lower 16 bits of the 32 bits result,
+        ; Returns the 32 bits unsigned result in AY and R0  (lower word, upper word).
-        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
+        %asm {{
-        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
+            jsr  verafx.muls
-        return muls(value1 as word, value2 as word) as uword
+        }}
    }
-    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
+    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 {
-        ; Returns the lower 16 bits of the 32 bits result in AY,
+        ; Returns the 32 bits signed result in AY and R0  (lower word, upper word).
        ; the upper 16 bits are stored in cx16.r0 so you can access those separately.
        ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
        %asm {{
            lda  #(2 << 1)
            sta  cx16.VERA_CTRL        ; $9F25
@ -131,13 +129,13 @@ verafx {
            lda  #(6 << 1)
            sta  cx16.VERA_CTRL        ; $9F25
            lda  cx16.r0
            ldy  cx16.r0+1
            sta  cx16.VERA_FX_CACHE_L  ; $9F29
-            sty  cx16.VERA_FX_CACHE_M  ; $9F2A
+            lda  cx16.r0+1
            sta  cx16.VERA_FX_CACHE_M  ; $9F2A
            lda  cx16.r1
            ldy  cx16.r1+1
            sta  cx16.VERA_FX_CACHE_H  ; $9F2B
-            sty  cx16.VERA_FX_CACHE_U  ; $9F2C
+            lda  cx16.r1+1
            sta  cx16.VERA_FX_CACHE_U  ; $9F2C
            lda  cx16.VERA_FX_ACCUM_RESET   ; $9F29 (DCSEL=6)
            ; Set the ADDR0 pointer to $1f9bc and write our multiplication result there
@ -155,12 +153,12 @@ verafx {
            stz  cx16.VERA_DATA0      ; multiply and write out result
            lda  #%00010001           ; $01 with Increment 1
            sta  cx16.VERA_ADDR_H     ; so we can read out the result
-            lda  cx16.VERA_DATA0
+            lda  cx16.VERA_DATA0      ; store the lower 16 bits of the result in AY
            ldy  cx16.VERA_DATA0
-            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in r0
+            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in R0
-            stx  cx16.r0
+            stx  cx16.r0s
            ldx  cx16.VERA_DATA0
-            stx  cx16.r0+1
+            stx  cx16.r0s+1
            stz  cx16.VERA_FX_CTRL    ; Cache write disable
            stz  cx16.VERA_CTRL       ; reset DCSEL
            rts
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@ -1,11 +1,9 @@
 TODO
 ====
 fix routines such as mult in verafx to return both 16-bit words of the result.
 ubyte x,y   compiles to more code than  ubyte x + ubyte y
-can we make ubyte x,y = cbm.SCREEN() work?
+can we make ubyte x,y = cbm.SCREEN() work?  (sugar for  ubyte x,y //  x,y=cbm.SCREEN() ?)
 ...
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,19 +1,28 @@
 %import textio
 %import verafx
 %zeropage basicsafe
 %option no_sysinit
 main {
    romsub $2000 = func1() clobbers(X) -> ubyte @A, word @R0, byte @R1
    romsub $3000 = func2() clobbers(X) -> ubyte @A, uword @R0, uword @R1
    romsub $4000 = func3() clobbers(X) -> ubyte @R0
    sub start() {
-        bool flag
+        uword result, resulthi
-        void cbm.GETIN()
+        result, resulthi = verafx.mult(9344, 6522)
-        flag, cx16.r1L = cbm.GETIN()
+        txt.print_uwhex(resulthi, true)
        txt.spc()
        txt.print_uwhex(result, false)
        txt.nl()
-        void, cx16.r0s, cx16.r1sL = func1()
+        word sresult, sresulthi
-        void, cx16.r2, cx16.r1 = func2()
+        sresult, sresulthi = verafx.muls(9344, -6522)
-        cx16.r0L = func3()
+        txt.print_w(sresulthi)
-        cx16.r0H = func3()
+        txt.spc()
        txt.print_w(sresult)
        txt.nl()
        sresult, sresulthi = verafx.muls(144, -22)
        txt.print_w(sresulthi)
        txt.spc()
        txt.print_w(sresult)
        txt.nl()
    }
 }