From 9de7698a5ca7d976e162820850cf706d95c0f193 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sun, 7 Apr 2024 22:31:56 +0200 Subject: [PATCH] verafx.mult() and muls() now return both words of the 32 bits result. --- compiler/res/prog8lib/cx16/verafx.p8 | 32 +++++++++++++--------------- docs/source/todo.rst | 4 +--- examples/test.p8 | 31 +++++++++++++++++---------- 3 files changed, 36 insertions(+), 31 deletions(-) diff --git a/compiler/res/prog8lib/cx16/verafx.p8 b/compiler/res/prog8lib/cx16/verafx.p8 index eda9ce09c..d6b5d88de 100644 --- a/compiler/res/prog8lib/cx16/verafx.p8 +++ b/compiler/res/prog8lib/cx16/verafx.p8 @@ -111,17 +111,15 @@ verafx { ; unsigned multiplication just passes the values as signed to muls ; if you do this yourself in your call to muls, it will save a few instructions. - sub mult(uword value1, uword value2) -> uword { - ; Returns the lower 16 bits of the 32 bits result, - ; the upper 16 bits are stored in cx16.r0 so you can access those separately. - ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues. - return muls(value1 as word, value2 as word) as uword + inline asmsub mult(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY, uword @R0 { + ; Returns the 32 bits unsigned result in AY and R0 (lower word, upper word). + %asm {{ + jsr verafx.muls + }} } - asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY { - ; Returns the lower 16 bits of the 32 bits result in AY, - ; the upper 16 bits are stored in cx16.r0 so you can access those separately. - ; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues. + asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 { + ; Returns the 32 bits signed result in AY and R0 (lower word, upper word). %asm {{ lda #(2 << 1) sta cx16.VERA_CTRL ; $9F25 @@ -131,13 +129,13 @@ verafx { lda #(6 << 1) sta cx16.VERA_CTRL ; $9F25 lda cx16.r0 - ldy cx16.r0+1 sta cx16.VERA_FX_CACHE_L ; $9F29 - sty cx16.VERA_FX_CACHE_M ; $9F2A + lda cx16.r0+1 + sta cx16.VERA_FX_CACHE_M ; $9F2A lda cx16.r1 - ldy cx16.r1+1 sta cx16.VERA_FX_CACHE_H ; $9F2B - sty cx16.VERA_FX_CACHE_U ; $9F2C + lda cx16.r1+1 + sta cx16.VERA_FX_CACHE_U ; $9F2C lda cx16.VERA_FX_ACCUM_RESET ; $9F29 (DCSEL=6) ; Set the ADDR0 pointer to $1f9bc and write our multiplication result there @@ -155,12 +153,12 @@ verafx { stz cx16.VERA_DATA0 ; multiply and write out result lda #%00010001 ; $01 with Increment 1 sta cx16.VERA_ADDR_H ; so we can read out the result - lda cx16.VERA_DATA0 + lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in AY ldy cx16.VERA_DATA0 - ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in r0 - stx cx16.r0 + ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in R0 + stx cx16.r0s ldx cx16.VERA_DATA0 - stx cx16.r0+1 + stx cx16.r0s+1 stz cx16.VERA_FX_CTRL ; Cache write disable stz cx16.VERA_CTRL ; reset DCSEL rts diff --git a/docs/source/todo.rst b/docs/source/todo.rst index e471e9bf4..c9f91c54f 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,11 +1,9 @@ TODO ==== -fix routines such as mult in verafx to return both 16-bit words of the result. - ubyte x,y compiles to more code than ubyte x + ubyte y -can we make ubyte x,y = cbm.SCREEN() work? +can we make ubyte x,y = cbm.SCREEN() work? (sugar for ubyte x,y // x,y=cbm.SCREEN() ?) ... diff --git a/examples/test.p8 b/examples/test.p8 index 4ff59cba6..25bed855b 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,19 +1,28 @@ +%import textio +%import verafx %zeropage basicsafe %option no_sysinit main { - romsub $2000 = func1() clobbers(X) -> ubyte @A, word @R0, byte @R1 - romsub $3000 = func2() clobbers(X) -> ubyte @A, uword @R0, uword @R1 - romsub $4000 = func3() clobbers(X) -> ubyte @R0 - sub start() { - bool flag - void cbm.GETIN() - flag, cx16.r1L = cbm.GETIN() + uword result, resulthi + result, resulthi = verafx.mult(9344, 6522) + txt.print_uwhex(resulthi, true) + txt.spc() + txt.print_uwhex(result, false) + txt.nl() - void, cx16.r0s, cx16.r1sL = func1() - void, cx16.r2, cx16.r1 = func2() - cx16.r0L = func3() - cx16.r0H = func3() + word sresult, sresulthi + sresult, sresulthi = verafx.muls(9344, -6522) + txt.print_w(sresulthi) + txt.spc() + txt.print_w(sresult) + txt.nl() + + sresult, sresulthi = verafx.muls(144, -22) + txt.print_w(sresulthi) + txt.spc() + txt.print_w(sresult) + txt.nl() } }