added math.mul32(), verafx.muls now returns long

2025-11-01 06:16:15 +00:00 · 2025-10-04 21:40:12 +02:00
parent db1aa3f257
commit e63921009c
9 changed files with 84 additions and 113 deletions
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt
@@ -1122,7 +1122,7 @@ internal class AssignmentAsmGen(
                            asmgen.out("  pla")
                            asmgen.out("  sta  cx16.r0 |  sty  cx16.r0+1")
                        }
-                        asmgen.out("  jsr  verafx.muls")
+                        asmgen.out("  jsr  verafx.muls16")
                        assignRegisterpairWord(target, RegisterOrPair.AY)
                        return true
                    } else {
@@ -1187,7 +1187,7 @@ internal class AssignmentAsmGen(
                            asmgen.out("""
                                sta  cx16.r0
                                sty  cx16.r0+1
-                                jsr  verafx.muls""")
+                                jsr  verafx.muls16""")
                        } else {
                            asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "prog8_math.multiply_words.multiplier")
                            asmgen.out("  jsr  prog8_math.multiply_words")
@@ -2488,6 +2488,9 @@ $endLabel""")
                    assignExpressionToRegister(value, RegisterOrPair.A, valueDt.isSigned)
                    assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.A, valueDt.base)
                }
+                valueDt.isLong -> {
+                    TODO("assign typecasted long to $targetDt ${value.position}")
+                }
                valueDt.isWord || valueDt.isPointer -> {
                    assignExpressionToRegister(value, RegisterOrPair.AY, valueDt.isSigned)
                    assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.AY, valueDt.base)
@@ -2983,7 +2986,15 @@ $endLabel""")
                            else -> throw AssemblyError("non-word regs")
                        }
                    }
-                    BaseDataType.LONG -> TODO("assign typecasted to LONG")
+                    BaseDataType.LONG -> {
+                        when(regs) {
+                            RegisterOrPair.AX -> asmgen.out("  sta  $targetAsmVarName |  stx  $targetAsmVarName+1")
+                            RegisterOrPair.AY -> asmgen.out("  sta  $targetAsmVarName |  sty  $targetAsmVarName+1")
+                            RegisterOrPair.XY -> asmgen.out("  stx  $targetAsmVarName |  sty  $targetAsmVarName+1")
+                            else -> throw AssemblyError("non-word regs")
+                        }
+                        asmgen.signExtendLongVariable(targetAsmVarName, BaseDataType.WORD)
+                    }
                    BaseDataType.FLOAT -> {
                        if(regs!=RegisterOrPair.AY)
                            throw AssemblyError("only supports AY here")
--- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
+++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt
@@ -2323,7 +2323,7 @@ $shortcutLabel:""")
                if(value in asmgen.optimizedWordMultiplications) {
                    asmgen.out("  lda  $lsb |  ldy  $msb |  jsr  prog8_math.mul_word_$value |  sta  $lsb |  sty  $msb")
                } else {
-                    if(block?.options?.veraFxMuls==true)
+                    if(block?.options?.veraFxMuls==true) {
                        // cx16 verafx hardware mul
                        asmgen.out("""
                            lda  $lsb
@@ -2334,9 +2334,10 @@ $shortcutLabel:""")
                            ldy  #>$value
                            sta  cx16.r1
                            sty  cx16.r1+1
-                            jsr  verafx.muls
+                            jsr  verafx.muls16
                            sta  $lsb
                            sty  $msb""")
+                    }
                    else
                        asmgen.out("""
                            lda  $lsb
@@ -2821,9 +2822,10 @@ $shortcutLabel:""")
                                ldy  $name+1
                                sta  cx16.r0
                                sty  cx16.r0+1
-                                jsr  verafx.muls
+                                jsr  verafx.muls16
                                sta  $name
                                sty  $name+1""")
+
                        } else {
                            if(valueDt.isUnsignedByte) {
                                asmgen.out("  lda  $otherName |  sta  prog8_math.multiply_words.multiplier")
@@ -2966,7 +2968,7 @@ $shortcutLabel:""")
                    "+" -> asmgen.out("  lda  $name |  clc |  adc  $otherName |  sta  $name |  lda  $name+1 |  adc  $otherName+1 |  sta  $name+1")
                    "-" -> asmgen.out("  lda  $name |  sec |  sbc  $otherName |  sta  $name |  lda  $name+1 |  sbc  $otherName+1 |  sta  $name+1")
                    "*" -> {
-                        if(block?.options?.veraFxMuls==true)
+                        if(block?.options?.veraFxMuls==true) {
                            // cx16 verafx hardware muls
                            asmgen.out("""
                                lda  $name
@@ -2977,9 +2979,10 @@ $shortcutLabel:""")
                                ldy  $otherName+1
                                sta  cx16.r1
                                sty  cx16.r1+1
-                                jsr  verafx.muls
+                                jsr  verafx.muls16
                                sta  $name
                                sty  $name+1""")
+                        }
                        else
                            asmgen.out("""
                                lda  $otherName
@@ -3170,7 +3173,7 @@ $shortcutLabel:""")
    private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression, block: PtBlock?) {
        require(dt.isWord)
        fun multiplyVarByWordInAX() {
-            if(block?.options?.veraFxMuls==true)
+            if(block?.options?.veraFxMuls==true) {
                // cx16 verafx hardware muls
                asmgen.out("""
                    sta  cx16.r1
@@ -3179,9 +3182,10 @@ $shortcutLabel:""")
                    ldx  $name+1
                    sta  cx16.r0
                    stx  cx16.r0+1
-                    jsr  verafx.muls
+                    jsr  verafx.muls16
                    sta  $name
                    sty  $name+1""")
+            }
            else
                asmgen.out("""
                    sta  prog8_math.multiply_words.multiplier
--- a/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt
+++ b/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt
@@ -686,6 +686,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
                        addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.WORD, reg1=tr.resultReg, immediate = 0), null)
                        actualResultReg2 = loadStatusAsBooleanResult(Opcode.BSTNE, result)
                    }
+                    valueDt.isLong -> TODO("typecast long ${cast.position}")
                    valueDt.isFloat -> {
                        actualResultReg2 = codeGen.registers.next(IRDataType.BYTE)
                        result += IRCodeChunk(null, null).also {
--- a/compiler/res/prog8lib/cx16/verafx.p8
+++ b/compiler/res/prog8lib/cx16/verafx.p8
@@ -116,26 +116,29 @@ verafx {


    asmsub mult16(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY {
-        ; Returns the 16 bits unsigned result of R0*R1 in AY.
+        ; Returns the lower 16 bits unsigned result of R0*R1 in AY
        ; Note: only the lower 16 bits!   (the upper 16 bits are not valid for unsigned word multiplications, only for signed)
        ; Verafx doesn't support unsigned values like this for full 32 bit result.
        ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
        %asm {{
-            lda  cx16.r0
-            sta  P8ZP_SCRATCH_W1
-            lda  cx16.r0+1
-            sta  P8ZP_SCRATCH_W1+1
-            jsr  verafx.muls
-            ldx  P8ZP_SCRATCH_W1
-            stx  cx16.r0
-            ldx  P8ZP_SCRATCH_W1+1
-            stx  cx16.r0+1
+            jmp  muls16
+        }}
+    }
+
+    asmsub muls16(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
+        ; Returns just the lower 16 bits signed result of the multiplication in cx16.AY.
+        ; Note: clobbers R0, R1, and VRAM $1f9bc - $1f9bf (inclusive)
+        %asm {{
+            jsr  muls
+            lda  cx16.r0L
+            ldy  cx16.r0H
            rts
        }}
    }

-    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 {
-        ; Returns the 32 bits signed result in AY and R0  (lower word, upper word).
+
+    asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> long @R0R1_32 {
+        ; Returns the 32 bits signed result in R0:R1  (lower word, upper word).
        ; Vera Fx multiplication support only works on signed values!
        ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
        %asm {{
@@ -171,12 +174,14 @@ verafx {
            stz  cx16.VERA_DATA0      ; multiply and write out result
            lda  #%00010001           ; $01 with Increment 1
            sta  cx16.VERA_ADDR_H     ; so we can read out the result
-            lda  cx16.VERA_DATA0      ; store the lower 16 bits of the result in AY
+            lda  cx16.VERA_DATA0      ; store the lower 16 bits of the result in R0
            ldy  cx16.VERA_DATA0
-            ldx  cx16.VERA_DATA0      ; store the upper 16 bits of the result in R0
-            stx  cx16.r0s
-            ldx  cx16.VERA_DATA0
-            stx  cx16.r0s+1
+            sta  cx16.r0L
+            sty  cx16.r0H
+            lda  cx16.VERA_DATA0      ; store the upper 16 bits of the result in R1
+            ldy  cx16.VERA_DATA0      ; store the upper 16 bits of the result in R1
+            sta  cx16.r1L
+            sty  cx16.r1H
            stz  cx16.VERA_FX_CTRL    ; Cache write disable
            stz  cx16.VERA_FX_MULT    ; $9F2C  reset multiply bit
            stz  cx16.VERA_CTRL       ; reset DCSEL
--- a/compiler/res/prog8lib/math.p8
+++ b/compiler/res/prog8lib/math.p8
@@ -220,6 +220,12 @@ _sinecosR8	.char  trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
        }}
    }

+    sub mul32(uword a, uword b) -> long {
+        ; return 32 bits result of a*b
+        cx16.r2 = a*b
+        return mklong2(mul16_last_upper(), cx16.r2)
+    }
+
 sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
    ; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
    cx16.r0L = 3        ; quadrant
--- a/compiler/res/prog8lib/virtual/math.p8
+++ b/compiler/res/prog8lib/virtual/math.p8
@@ -304,6 +304,12 @@ math {
        }}
    }

+    sub mul32(uword a, uword b) -> long {
+        ; return 32 bits result of a*b
+        cx16.r2 = a*b
+        return mklong2(mul16_last_upper(), cx16.r2)
+    }
+
    sub diff(ubyte b1, ubyte b2) -> ubyte {
        if b1>b2
            return b1-b2
--- a/docs/source/libraries.rst
+++ b/docs/source/libraries.rst
@@ -1239,7 +1239,10 @@ Available for the Cx16 target. Routines that use the Vera FX logic to accelerate
    But it depends on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage.
    Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block
    by calls to verafx, but be careful with it because it may interfere with other Vera operations or IRQs.
-    The full 32 bits result value is returned in two result values: lower word, upper word.
+    The full 32 bits result value is returned as a long.
+
+``muls16``
+    Like ``muls`` but only returns the lower word of the result, which is sometimes useful if you're just interested in word values.

 ``mult16``
    VeraFX hardware multiplication of two unsigned words.
--- a/docs/source/todo.rst
+++ b/docs/source/todo.rst
@@ -3,9 +3,7 @@ TODO

 LONG TYPE
 ---------
- scan through more library routines if there are opportunities to use a long param or returnvalue?
- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML,  and the use of R0:R1 when doing LONG calculations
- asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
+- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, math.mul32, verafx.muls/muls16, and the use of R0:R1 when doing LONG calculations, asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
 - how hard is it to also implement the other comparison operators (<,>,<=,>=) on longs?
 - implement LONG testcases in testmemory

--- a/examples/test.p8
+++ b/examples/test.p8
@@ -1,95 +1,32 @@
 %import textio
 %import math
+%import verafx
 %zeropage basicsafe

 main {
+    %option verafxmuls
+
    sub start() {
-        long @shared lv1 = 12345678
-        long @shared lv2same = 12345678
-        long @shared lv2different = 999999

-        if lv1==0
-            txt.print("wrong1\n")
+        cx16.r5s = 22
+        cx16.r6s = -999

-        if lv1==0
-            txt.print("wrong2\n")
-        else
-            txt.print("ok2\n")
+        cx16.r0s = cx16.r5s * cx16.r6s
+        txt.print_w(cx16.r0s)
+        txt.nl()

-        if lv1!=0
-            txt.print("ok3\n")
-
-        if lv1!=0
-            txt.print("ok4\n")
-        else
-            txt.print("wrong4\n")
+        long lv = cx16.r5s * cx16.r6s
+        txt.print_l(lv)
+        txt.nl()


-        if lv1==999999
-            txt.print("wrong5\n")
-
-        if lv1==999999
-            txt.print("wrong6\n")
-        else
-            txt.print("ok6\n")
-
-        if lv1!=999999
-            txt.print("ok7\n")
-
-        if lv1!=999999
-            txt.print("ok8\n")
-        else
-            txt.print("wrong8\n")
-
-        if lv1==12345678
-            txt.print("ok9\n")
-
-        if lv1==12345678
-            txt.print("ok10\n")
-        else
-            txt.print("wrong10\n")
-
-        if lv1!=12345678
-            txt.print("wrong11\n")
-
-        if lv1!=12345678
-            txt.print("wrong12\n")
-        else
-            txt.print("ok12\n")
-
-
-
-        if lv1==lv2same
-            txt.print("ok13\n")
-
-        if lv1==lv2same
-            txt.print("ok14\n")
-        else
-            txt.print("wrong14\n")
-
-        if lv1!=lv2same
-            txt.print("wrong15\n")
-
-        if lv1!=lv2same
-            txt.print("wrong16\n")
-        else
-            txt.print("ok16\n")
-
-
-        if lv1==lv2different
-            txt.print("wrong17\n")
-
-        if lv1==lv2different
-            txt.print("wrong18\n")
-        else
-            txt.print("ok18\n")
-
-        if lv1!=lv2different
-            txt.print("ok19\n")
-
-        if lv1!=lv2different
-            txt.print("ok20\n")
-        else
-            txt.print("wrong20\n")
+        cx16.r5s = 5555
+        cx16.r6s = -9999
+        lv = cx16.r5s * cx16.r6s
+        txt.print_l(lv)
+        txt.nl()
+        lv = verafx.muls(cx16.r5s, cx16.r6s)
+        txt.print_l(lv)
+        txt.nl()
    }
 }