verafx.mult/muls now return upper 16 bits of the result in r0

This commit is contained in:
Irmen de Jong 2023-11-06 21:55:58 +01:00
parent efe4df92dc
commit ee2888e744
4 changed files with 26 additions and 17 deletions

View File

@ -65,10 +65,16 @@ verafx {
; unsigned multiplication just passes the values as signed to muls
; if you do this yourself in your call to muls, it will save a few instructions.
sub mult(uword value1, uword value2) -> uword {
; Returns the lower 16 bits of the 32 bits result,
; the upper 16 bits are stored in cx16.r0 so you can access those separately.
; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
return muls(value1 as word, value2 as word) as uword
}
asmsub muls(word value1 @R0, word value2 @R1) -> word @AY {
asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
; Returns the lower 16 bits of the 32 bits result in AY,
; the upper 16 bits are stored in cx16.r0 so you can access those separately.
; It's not part of the subroutine's signature to avoid awkward use of multiple returnvalues.
%asm {{
lda #(2 << 1)
sta cx16.VERA_CTRL ; $9F25
@ -104,14 +110,13 @@ verafx {
sta cx16.VERA_ADDR_H ; so we can read out the result
lda cx16.VERA_DATA0
ldy cx16.VERA_DATA0
ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in r0
stx cx16.r0
ldx cx16.VERA_DATA0
stx cx16.r0+1
stz cx16.VERA_FX_CTRL ; Cache write disable
stz cx16.VERA_CTRL ; reset DCSEL
rts
; we skip the upper 16 bits of the result:
; lda cx16.VERA_DATA0
; sta $0402
; lda cx16.VERA_DATA0
; sta $0403
}}
}

View File

@ -332,7 +332,7 @@ private fun Prog8ANTLRParser.ClobberContext.toAst() : Set<CpuRegister> {
try {
return names.map { CpuRegister.valueOf(it) }.toSet()
} catch(ax: IllegalArgumentException) {
throw SyntaxError("invalid pu register", toPosition())
throw SyntaxError("invalid cpu register", toPosition())
}
}

View File

@ -415,6 +415,10 @@ but perhaps the provided ones can be of service too.
Returns the absolute difference, or distance, between the two word values.
(This routine is more efficient than doing a compare and a subtract separately, or using abs)
``mul16_last_upper () -> uword``
Fetches the upper 16 bits of the previous 16*16 bit multiplication.
To avoid corrupting the result, it is best performed immediately after the multiplication.
cx16logo
--------
@ -543,6 +547,9 @@ the emulators already support it).
Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block
by calls to verafx.muls/mult, but be careful with it because it may interfere with other Vera operations or IRQs.
Note: the lower 16 bits of the 32 bits result is returned as the normal subroutine's returnvalue,
but the upper 16 bits is returned in `cx16.r0` so you can still access those separately.
``clear``
Very quickly clear a piece of vram to a given byte value (it writes 4 bytes at a time).
The routine is around 3 times faster as a regular unrolled loop to clear vram.

View File

@ -1,17 +1,14 @@
%zeropage basicsafe
%option no_sysinit
%import textio
%import verafx
main {
const ubyte FOO = 0
const ubyte BAR = 1
sub start() {
when FOO+BAR {
1-> txt.print("path 1")
2-> txt.print("path 2")
else-> txt.print("path 3")
sub start() {
uword lower16 = verafx.mult(11111,9988)
uword upper16 = cx16.r0
txt.print_uwhex(upper16, true) ; $069d5e9c = 110976668
txt.print_uwhex(lower16, false)
txt.nl()
}
txt.nl()
}
}