From e63921009cd3c7fa92b70d707190bc6ab517fec7 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sat, 4 Oct 2025 21:40:12 +0200 Subject: [PATCH] added math.mul32(), verafx.muls now returns long --- .../cpu6502/assignment/AssignmentAsmGen.kt | 17 ++- .../assignment/AugmentableAssignmentAsmGen.kt | 18 ++-- .../codegen/intermediate/ExpressionGen.kt | 1 + compiler/res/prog8lib/cx16/verafx.p8 | 39 ++++--- compiler/res/prog8lib/math.p8 | 6 ++ compiler/res/prog8lib/virtual/math.p8 | 6 ++ docs/source/libraries.rst | 5 +- docs/source/todo.rst | 4 +- examples/test.p8 | 101 ++++-------------- 9 files changed, 84 insertions(+), 113 deletions(-) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index d2365a506..0cc318840 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -1122,7 +1122,7 @@ internal class AssignmentAsmGen( asmgen.out(" pla") asmgen.out(" sta cx16.r0 | sty cx16.r0+1") } - asmgen.out(" jsr verafx.muls") + asmgen.out(" jsr verafx.muls16") assignRegisterpairWord(target, RegisterOrPair.AY) return true } else { @@ -1187,7 +1187,7 @@ internal class AssignmentAsmGen( asmgen.out(""" sta cx16.r0 sty cx16.r0+1 - jsr verafx.muls""") + jsr verafx.muls16""") } else { asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "prog8_math.multiply_words.multiplier") asmgen.out(" jsr prog8_math.multiply_words") @@ -2488,6 +2488,9 @@ $endLabel""") assignExpressionToRegister(value, RegisterOrPair.A, valueDt.isSigned) assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.A, valueDt.base) } + valueDt.isLong -> { + TODO("assign typecasted long to $targetDt ${value.position}") + } valueDt.isWord || valueDt.isPointer -> { assignExpressionToRegister(value, RegisterOrPair.AY, valueDt.isSigned) assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.AY, valueDt.base) @@ -2983,7 +2986,15 @@ $endLabel""") else -> throw AssemblyError("non-word regs") } } - BaseDataType.LONG -> TODO("assign typecasted to LONG") + BaseDataType.LONG -> { + when(regs) { + RegisterOrPair.AX -> asmgen.out(" sta $targetAsmVarName | stx $targetAsmVarName+1") + RegisterOrPair.AY -> asmgen.out(" sta $targetAsmVarName | sty $targetAsmVarName+1") + RegisterOrPair.XY -> asmgen.out(" stx $targetAsmVarName | sty $targetAsmVarName+1") + else -> throw AssemblyError("non-word regs") + } + asmgen.signExtendLongVariable(targetAsmVarName, BaseDataType.WORD) + } BaseDataType.FLOAT -> { if(regs!=RegisterOrPair.AY) throw AssemblyError("only supports AY here") diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt index 14c9a4ebc..fa6c4bec4 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt @@ -2323,7 +2323,7 @@ $shortcutLabel:""") if(value in asmgen.optimizedWordMultiplications) { asmgen.out(" lda $lsb | ldy $msb | jsr prog8_math.mul_word_$value | sta $lsb | sty $msb") } else { - if(block?.options?.veraFxMuls==true) + if(block?.options?.veraFxMuls==true) { // cx16 verafx hardware mul asmgen.out(""" lda $lsb @@ -2334,9 +2334,10 @@ $shortcutLabel:""") ldy #>$value sta cx16.r1 sty cx16.r1+1 - jsr verafx.muls + jsr verafx.muls16 sta $lsb sty $msb""") + } else asmgen.out(""" lda $lsb @@ -2821,9 +2822,10 @@ $shortcutLabel:""") ldy $name+1 sta cx16.r0 sty cx16.r0+1 - jsr verafx.muls + jsr verafx.muls16 sta $name sty $name+1""") + } else { if(valueDt.isUnsignedByte) { asmgen.out(" lda $otherName | sta prog8_math.multiply_words.multiplier") @@ -2966,7 +2968,7 @@ $shortcutLabel:""") "+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1") "-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1") "*" -> { - if(block?.options?.veraFxMuls==true) + if(block?.options?.veraFxMuls==true) { // cx16 verafx hardware muls asmgen.out(""" lda $name @@ -2977,9 +2979,10 @@ $shortcutLabel:""") ldy $otherName+1 sta cx16.r1 sty cx16.r1+1 - jsr verafx.muls + jsr verafx.muls16 sta $name sty $name+1""") + } else asmgen.out(""" lda $otherName @@ -3170,7 +3173,7 @@ $shortcutLabel:""") private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression, block: PtBlock?) { require(dt.isWord) fun multiplyVarByWordInAX() { - if(block?.options?.veraFxMuls==true) + if(block?.options?.veraFxMuls==true) { // cx16 verafx hardware muls asmgen.out(""" sta cx16.r1 @@ -3179,9 +3182,10 @@ $shortcutLabel:""") ldx $name+1 sta cx16.r0 stx cx16.r0+1 - jsr verafx.muls + jsr verafx.muls16 sta $name sty $name+1""") + } else asmgen.out(""" sta prog8_math.multiply_words.multiplier diff --git a/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt b/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt index b54924e64..a7ea013a5 100644 --- a/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt +++ b/codeGenIntermediate/src/prog8/codegen/intermediate/ExpressionGen.kt @@ -686,6 +686,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) { addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.WORD, reg1=tr.resultReg, immediate = 0), null) actualResultReg2 = loadStatusAsBooleanResult(Opcode.BSTNE, result) } + valueDt.isLong -> TODO("typecast long ${cast.position}") valueDt.isFloat -> { actualResultReg2 = codeGen.registers.next(IRDataType.BYTE) result += IRCodeChunk(null, null).also { diff --git a/compiler/res/prog8lib/cx16/verafx.p8 b/compiler/res/prog8lib/cx16/verafx.p8 index 4ce8b2408..286a3114d 100644 --- a/compiler/res/prog8lib/cx16/verafx.p8 +++ b/compiler/res/prog8lib/cx16/verafx.p8 @@ -116,26 +116,29 @@ verafx { asmsub mult16(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY { - ; Returns the 16 bits unsigned result of R0*R1 in AY. + ; Returns the lower 16 bits unsigned result of R0*R1 in AY ; Note: only the lower 16 bits! (the upper 16 bits are not valid for unsigned word multiplications, only for signed) ; Verafx doesn't support unsigned values like this for full 32 bit result. ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive) %asm {{ - lda cx16.r0 - sta P8ZP_SCRATCH_W1 - lda cx16.r0+1 - sta P8ZP_SCRATCH_W1+1 - jsr verafx.muls - ldx P8ZP_SCRATCH_W1 - stx cx16.r0 - ldx P8ZP_SCRATCH_W1+1 - stx cx16.r0+1 + jmp muls16 + }} + } + + asmsub muls16(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY { + ; Returns just the lower 16 bits signed result of the multiplication in cx16.AY. + ; Note: clobbers R0, R1, and VRAM $1f9bc - $1f9bf (inclusive) + %asm {{ + jsr muls + lda cx16.r0L + ldy cx16.r0H rts }} } - asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 { - ; Returns the 32 bits signed result in AY and R0 (lower word, upper word). + + asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> long @R0R1_32 { + ; Returns the 32 bits signed result in R0:R1 (lower word, upper word). ; Vera Fx multiplication support only works on signed values! ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive) %asm {{ @@ -171,12 +174,14 @@ verafx { stz cx16.VERA_DATA0 ; multiply and write out result lda #%00010001 ; $01 with Increment 1 sta cx16.VERA_ADDR_H ; so we can read out the result - lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in AY + lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in R0 ldy cx16.VERA_DATA0 - ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in R0 - stx cx16.r0s - ldx cx16.VERA_DATA0 - stx cx16.r0s+1 + sta cx16.r0L + sty cx16.r0H + lda cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1 + ldy cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1 + sta cx16.r1L + sty cx16.r1H stz cx16.VERA_FX_CTRL ; Cache write disable stz cx16.VERA_FX_MULT ; $9F2C reset multiply bit stz cx16.VERA_CTRL ; reset DCSEL diff --git a/compiler/res/prog8lib/math.p8 b/compiler/res/prog8lib/math.p8 index c1d36e873..f5fce6a84 100644 --- a/compiler/res/prog8lib/math.p8 +++ b/compiler/res/prog8lib/math.p8 @@ -220,6 +220,12 @@ _sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0))) }} } + sub mul32(uword a, uword b) -> long { + ; return 32 bits result of a*b + cx16.r2 = a*b + return mklong2(mul16_last_upper(), cx16.r2) + } + sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte { ; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A. cx16.r0L = 3 ; quadrant diff --git a/compiler/res/prog8lib/virtual/math.p8 b/compiler/res/prog8lib/virtual/math.p8 index 260ace7f2..993e17793 100644 --- a/compiler/res/prog8lib/virtual/math.p8 +++ b/compiler/res/prog8lib/virtual/math.p8 @@ -304,6 +304,12 @@ math { }} } + sub mul32(uword a, uword b) -> long { + ; return 32 bits result of a*b + cx16.r2 = a*b + return mklong2(mul16_last_upper(), cx16.r2) + } + sub diff(ubyte b1, ubyte b2) -> ubyte { if b1>b2 return b1-b2 diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index 987509fa8..2b6601a85 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -1239,7 +1239,10 @@ Available for the Cx16 target. Routines that use the Vera FX logic to accelerate But it depends on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage. Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block by calls to verafx, but be careful with it because it may interfere with other Vera operations or IRQs. - The full 32 bits result value is returned in two result values: lower word, upper word. + The full 32 bits result value is returned as a long. + +``muls16`` + Like ``muls`` but only returns the lower word of the result, which is sometimes useful if you're just interested in word values. ``mult16`` VeraFX hardware multiplication of two unsigned words. diff --git a/docs/source/todo.rst b/docs/source/todo.rst index 5252f6ebf..42599fe5a 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -3,9 +3,7 @@ TODO LONG TYPE --------- -- scan through more library routines if there are opportunities to use a long param or returnvalue? -- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, and the use of R0:R1 when doing LONG calculations -- asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1 +- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, math.mul32, verafx.muls/muls16, and the use of R0:R1 when doing LONG calculations, asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1 - how hard is it to also implement the other comparison operators (<,>,<=,>=) on longs? - implement LONG testcases in testmemory diff --git a/examples/test.p8 b/examples/test.p8 index 0de96095b..d7cc195d6 100644 --- a/examples/test.p8 +++ b/examples/test.p8 @@ -1,95 +1,32 @@ %import textio %import math +%import verafx %zeropage basicsafe main { + %option verafxmuls + sub start() { - long @shared lv1 = 12345678 - long @shared lv2same = 12345678 - long @shared lv2different = 999999 - if lv1==0 - txt.print("wrong1\n") + cx16.r5s = 22 + cx16.r6s = -999 - if lv1==0 - txt.print("wrong2\n") - else - txt.print("ok2\n") + cx16.r0s = cx16.r5s * cx16.r6s + txt.print_w(cx16.r0s) + txt.nl() - if lv1!=0 - txt.print("ok3\n") - - if lv1!=0 - txt.print("ok4\n") - else - txt.print("wrong4\n") + long lv = cx16.r5s * cx16.r6s + txt.print_l(lv) + txt.nl() - if lv1==999999 - txt.print("wrong5\n") - - if lv1==999999 - txt.print("wrong6\n") - else - txt.print("ok6\n") - - if lv1!=999999 - txt.print("ok7\n") - - if lv1!=999999 - txt.print("ok8\n") - else - txt.print("wrong8\n") - - if lv1==12345678 - txt.print("ok9\n") - - if lv1==12345678 - txt.print("ok10\n") - else - txt.print("wrong10\n") - - if lv1!=12345678 - txt.print("wrong11\n") - - if lv1!=12345678 - txt.print("wrong12\n") - else - txt.print("ok12\n") - - - - if lv1==lv2same - txt.print("ok13\n") - - if lv1==lv2same - txt.print("ok14\n") - else - txt.print("wrong14\n") - - if lv1!=lv2same - txt.print("wrong15\n") - - if lv1!=lv2same - txt.print("wrong16\n") - else - txt.print("ok16\n") - - - if lv1==lv2different - txt.print("wrong17\n") - - if lv1==lv2different - txt.print("wrong18\n") - else - txt.print("ok18\n") - - if lv1!=lv2different - txt.print("ok19\n") - - if lv1!=lv2different - txt.print("ok20\n") - else - txt.print("wrong20\n") + cx16.r5s = 5555 + cx16.r6s = -9999 + lv = cx16.r5s * cx16.r6s + txt.print_l(lv) + txt.nl() + lv = verafx.muls(cx16.r5s, cx16.r6s) + txt.print_l(lv) + txt.nl() } }