added math.mul32(), verafx.muls now returns long

This commit is contained in:
Irmen de Jong
2025-10-04 21:40:12 +02:00
parent db1aa3f257
commit e63921009c
9 changed files with 84 additions and 113 deletions

View File

@@ -1122,7 +1122,7 @@ internal class AssignmentAsmGen(
asmgen.out(" pla")
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
}
asmgen.out(" jsr verafx.muls")
asmgen.out(" jsr verafx.muls16")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
} else {
@@ -1187,7 +1187,7 @@ internal class AssignmentAsmGen(
asmgen.out("""
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls""")
jsr verafx.muls16""")
} else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "prog8_math.multiply_words.multiplier")
asmgen.out(" jsr prog8_math.multiply_words")
@@ -2488,6 +2488,9 @@ $endLabel""")
assignExpressionToRegister(value, RegisterOrPair.A, valueDt.isSigned)
assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.A, valueDt.base)
}
valueDt.isLong -> {
TODO("assign typecasted long to $targetDt ${value.position}")
}
valueDt.isWord || valueDt.isPointer -> {
assignExpressionToRegister(value, RegisterOrPair.AY, valueDt.isSigned)
assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.AY, valueDt.base)
@@ -2983,7 +2986,15 @@ $endLabel""")
else -> throw AssemblyError("non-word regs")
}
}
BaseDataType.LONG -> TODO("assign typecasted to LONG")
BaseDataType.LONG -> {
when(regs) {
RegisterOrPair.AX -> asmgen.out(" sta $targetAsmVarName | stx $targetAsmVarName+1")
RegisterOrPair.AY -> asmgen.out(" sta $targetAsmVarName | sty $targetAsmVarName+1")
RegisterOrPair.XY -> asmgen.out(" stx $targetAsmVarName | sty $targetAsmVarName+1")
else -> throw AssemblyError("non-word regs")
}
asmgen.signExtendLongVariable(targetAsmVarName, BaseDataType.WORD)
}
BaseDataType.FLOAT -> {
if(regs!=RegisterOrPair.AY)
throw AssemblyError("only supports AY here")

View File

@@ -2323,7 +2323,7 @@ $shortcutLabel:""")
if(value in asmgen.optimizedWordMultiplications) {
asmgen.out(" lda $lsb | ldy $msb | jsr prog8_math.mul_word_$value | sta $lsb | sty $msb")
} else {
if(block?.options?.veraFxMuls==true)
if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware mul
asmgen.out("""
lda $lsb
@@ -2334,9 +2334,10 @@ $shortcutLabel:""")
ldy #>$value
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
jsr verafx.muls16
sta $lsb
sty $msb""")
}
else
asmgen.out("""
lda $lsb
@@ -2821,9 +2822,10 @@ $shortcutLabel:""")
ldy $name+1
sta cx16.r0
sty cx16.r0+1
jsr verafx.muls
jsr verafx.muls16
sta $name
sty $name+1""")
} else {
if(valueDt.isUnsignedByte) {
asmgen.out(" lda $otherName | sta prog8_math.multiply_words.multiplier")
@@ -2966,7 +2968,7 @@ $shortcutLabel:""")
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> {
if(block?.options?.veraFxMuls==true)
if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware muls
asmgen.out("""
lda $name
@@ -2977,9 +2979,10 @@ $shortcutLabel:""")
ldy $otherName+1
sta cx16.r1
sty cx16.r1+1
jsr verafx.muls
jsr verafx.muls16
sta $name
sty $name+1""")
}
else
asmgen.out("""
lda $otherName
@@ -3170,7 +3173,7 @@ $shortcutLabel:""")
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression, block: PtBlock?) {
require(dt.isWord)
fun multiplyVarByWordInAX() {
if(block?.options?.veraFxMuls==true)
if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware muls
asmgen.out("""
sta cx16.r1
@@ -3179,9 +3182,10 @@ $shortcutLabel:""")
ldx $name+1
sta cx16.r0
stx cx16.r0+1
jsr verafx.muls
jsr verafx.muls16
sta $name
sty $name+1""")
}
else
asmgen.out("""
sta prog8_math.multiply_words.multiplier

View File

@@ -686,6 +686,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.WORD, reg1=tr.resultReg, immediate = 0), null)
actualResultReg2 = loadStatusAsBooleanResult(Opcode.BSTNE, result)
}
valueDt.isLong -> TODO("typecast long ${cast.position}")
valueDt.isFloat -> {
actualResultReg2 = codeGen.registers.next(IRDataType.BYTE)
result += IRCodeChunk(null, null).also {

View File

@@ -116,26 +116,29 @@ verafx {
asmsub mult16(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY {
; Returns the 16 bits unsigned result of R0*R1 in AY.
; Returns the lower 16 bits unsigned result of R0*R1 in AY
; Note: only the lower 16 bits! (the upper 16 bits are not valid for unsigned word multiplications, only for signed)
; Verafx doesn't support unsigned values like this for full 32 bit result.
; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
%asm {{
lda cx16.r0
sta P8ZP_SCRATCH_W1
lda cx16.r0+1
sta P8ZP_SCRATCH_W1+1
jsr verafx.muls
ldx P8ZP_SCRATCH_W1
stx cx16.r0
ldx P8ZP_SCRATCH_W1+1
stx cx16.r0+1
jmp muls16
}}
}
asmsub muls16(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
; Returns just the lower 16 bits signed result of the multiplication in cx16.AY.
; Note: clobbers R0, R1, and VRAM $1f9bc - $1f9bf (inclusive)
%asm {{
jsr muls
lda cx16.r0L
ldy cx16.r0H
rts
}}
}
asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 {
; Returns the 32 bits signed result in AY and R0 (lower word, upper word).
asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> long @R0R1_32 {
; Returns the 32 bits signed result in R0:R1 (lower word, upper word).
; Vera Fx multiplication support only works on signed values!
; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
%asm {{
@@ -171,12 +174,14 @@ verafx {
stz cx16.VERA_DATA0 ; multiply and write out result
lda #%00010001 ; $01 with Increment 1
sta cx16.VERA_ADDR_H ; so we can read out the result
lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in AY
lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in R0
ldy cx16.VERA_DATA0
ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in R0
stx cx16.r0s
ldx cx16.VERA_DATA0
stx cx16.r0s+1
sta cx16.r0L
sty cx16.r0H
lda cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1
ldy cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1
sta cx16.r1L
sty cx16.r1H
stz cx16.VERA_FX_CTRL ; Cache write disable
stz cx16.VERA_FX_MULT ; $9F2C reset multiply bit
stz cx16.VERA_CTRL ; reset DCSEL

View File

@@ -220,6 +220,12 @@ _sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
}}
}
sub mul32(uword a, uword b) -> long {
; return 32 bits result of a*b
cx16.r2 = a*b
return mklong2(mul16_last_upper(), cx16.r2)
}
sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
cx16.r0L = 3 ; quadrant

View File

@@ -304,6 +304,12 @@ math {
}}
}
sub mul32(uword a, uword b) -> long {
; return 32 bits result of a*b
cx16.r2 = a*b
return mklong2(mul16_last_upper(), cx16.r2)
}
sub diff(ubyte b1, ubyte b2) -> ubyte {
if b1>b2
return b1-b2

View File

@@ -1239,7 +1239,10 @@ Available for the Cx16 target. Routines that use the Vera FX logic to accelerate
But it depends on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage.
Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block
by calls to verafx, but be careful with it because it may interfere with other Vera operations or IRQs.
The full 32 bits result value is returned in two result values: lower word, upper word.
The full 32 bits result value is returned as a long.
``muls16``
Like ``muls`` but only returns the lower word of the result, which is sometimes useful if you're just interested in word values.
``mult16``
VeraFX hardware multiplication of two unsigned words.

View File

@@ -3,9 +3,7 @@ TODO
LONG TYPE
---------
- scan through more library routines if there are opportunities to use a long param or returnvalue?
- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, and the use of R0:R1 when doing LONG calculations
- asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, math.mul32, verafx.muls/muls16, and the use of R0:R1 when doing LONG calculations, asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
- how hard is it to also implement the other comparison operators (<,>,<=,>=) on longs?
- implement LONG testcases in testmemory

View File

@@ -1,95 +1,32 @@
%import textio
%import math
%import verafx
%zeropage basicsafe
main {
%option verafxmuls
sub start() {
long @shared lv1 = 12345678
long @shared lv2same = 12345678
long @shared lv2different = 999999
if lv1==0
txt.print("wrong1\n")
cx16.r5s = 22
cx16.r6s = -999
if lv1==0
txt.print("wrong2\n")
else
txt.print("ok2\n")
cx16.r0s = cx16.r5s * cx16.r6s
txt.print_w(cx16.r0s)
txt.nl()
if lv1!=0
txt.print("ok3\n")
if lv1!=0
txt.print("ok4\n")
else
txt.print("wrong4\n")
long lv = cx16.r5s * cx16.r6s
txt.print_l(lv)
txt.nl()
if lv1==999999
txt.print("wrong5\n")
if lv1==999999
txt.print("wrong6\n")
else
txt.print("ok6\n")
if lv1!=999999
txt.print("ok7\n")
if lv1!=999999
txt.print("ok8\n")
else
txt.print("wrong8\n")
if lv1==12345678
txt.print("ok9\n")
if lv1==12345678
txt.print("ok10\n")
else
txt.print("wrong10\n")
if lv1!=12345678
txt.print("wrong11\n")
if lv1!=12345678
txt.print("wrong12\n")
else
txt.print("ok12\n")
if lv1==lv2same
txt.print("ok13\n")
if lv1==lv2same
txt.print("ok14\n")
else
txt.print("wrong14\n")
if lv1!=lv2same
txt.print("wrong15\n")
if lv1!=lv2same
txt.print("wrong16\n")
else
txt.print("ok16\n")
if lv1==lv2different
txt.print("wrong17\n")
if lv1==lv2different
txt.print("wrong18\n")
else
txt.print("ok18\n")
if lv1!=lv2different
txt.print("ok19\n")
if lv1!=lv2different
txt.print("ok20\n")
else
txt.print("wrong20\n")
cx16.r5s = 5555
cx16.r6s = -9999
lv = cx16.r5s * cx16.r6s
txt.print_l(lv)
txt.nl()
lv = verafx.muls(cx16.r5s, cx16.r6s)
txt.print_l(lv)
txt.nl()
}
}