added math.mul32(), verafx.muls now returns long

This commit is contained in:
Irmen de Jong
2025-10-04 21:40:12 +02:00
parent db1aa3f257
commit e63921009c
9 changed files with 84 additions and 113 deletions

View File

@@ -1122,7 +1122,7 @@ internal class AssignmentAsmGen(
asmgen.out(" pla") asmgen.out(" pla")
asmgen.out(" sta cx16.r0 | sty cx16.r0+1") asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
} }
asmgen.out(" jsr verafx.muls") asmgen.out(" jsr verafx.muls16")
assignRegisterpairWord(target, RegisterOrPair.AY) assignRegisterpairWord(target, RegisterOrPair.AY)
return true return true
} else { } else {
@@ -1187,7 +1187,7 @@ internal class AssignmentAsmGen(
asmgen.out(""" asmgen.out("""
sta cx16.r0 sta cx16.r0
sty cx16.r0+1 sty cx16.r0+1
jsr verafx.muls""") jsr verafx.muls16""")
} else { } else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "prog8_math.multiply_words.multiplier") asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "prog8_math.multiply_words.multiplier")
asmgen.out(" jsr prog8_math.multiply_words") asmgen.out(" jsr prog8_math.multiply_words")
@@ -2488,6 +2488,9 @@ $endLabel""")
assignExpressionToRegister(value, RegisterOrPair.A, valueDt.isSigned) assignExpressionToRegister(value, RegisterOrPair.A, valueDt.isSigned)
assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.A, valueDt.base) assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.A, valueDt.base)
} }
valueDt.isLong -> {
TODO("assign typecasted long to $targetDt ${value.position}")
}
valueDt.isWord || valueDt.isPointer -> { valueDt.isWord || valueDt.isPointer -> {
assignExpressionToRegister(value, RegisterOrPair.AY, valueDt.isSigned) assignExpressionToRegister(value, RegisterOrPair.AY, valueDt.isSigned)
assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.AY, valueDt.base) assignTypeCastedRegisters(target.asmVarname, targetDt.base, RegisterOrPair.AY, valueDt.base)
@@ -2983,7 +2986,15 @@ $endLabel""")
else -> throw AssemblyError("non-word regs") else -> throw AssemblyError("non-word regs")
} }
} }
BaseDataType.LONG -> TODO("assign typecasted to LONG") BaseDataType.LONG -> {
when(regs) {
RegisterOrPair.AX -> asmgen.out(" sta $targetAsmVarName | stx $targetAsmVarName+1")
RegisterOrPair.AY -> asmgen.out(" sta $targetAsmVarName | sty $targetAsmVarName+1")
RegisterOrPair.XY -> asmgen.out(" stx $targetAsmVarName | sty $targetAsmVarName+1")
else -> throw AssemblyError("non-word regs")
}
asmgen.signExtendLongVariable(targetAsmVarName, BaseDataType.WORD)
}
BaseDataType.FLOAT -> { BaseDataType.FLOAT -> {
if(regs!=RegisterOrPair.AY) if(regs!=RegisterOrPair.AY)
throw AssemblyError("only supports AY here") throw AssemblyError("only supports AY here")

View File

@@ -2323,7 +2323,7 @@ $shortcutLabel:""")
if(value in asmgen.optimizedWordMultiplications) { if(value in asmgen.optimizedWordMultiplications) {
asmgen.out(" lda $lsb | ldy $msb | jsr prog8_math.mul_word_$value | sta $lsb | sty $msb") asmgen.out(" lda $lsb | ldy $msb | jsr prog8_math.mul_word_$value | sta $lsb | sty $msb")
} else { } else {
if(block?.options?.veraFxMuls==true) if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware mul // cx16 verafx hardware mul
asmgen.out(""" asmgen.out("""
lda $lsb lda $lsb
@@ -2334,9 +2334,10 @@ $shortcutLabel:""")
ldy #>$value ldy #>$value
sta cx16.r1 sta cx16.r1
sty cx16.r1+1 sty cx16.r1+1
jsr verafx.muls jsr verafx.muls16
sta $lsb sta $lsb
sty $msb""") sty $msb""")
}
else else
asmgen.out(""" asmgen.out("""
lda $lsb lda $lsb
@@ -2821,9 +2822,10 @@ $shortcutLabel:""")
ldy $name+1 ldy $name+1
sta cx16.r0 sta cx16.r0
sty cx16.r0+1 sty cx16.r0+1
jsr verafx.muls jsr verafx.muls16
sta $name sta $name
sty $name+1""") sty $name+1""")
} else { } else {
if(valueDt.isUnsignedByte) { if(valueDt.isUnsignedByte) {
asmgen.out(" lda $otherName | sta prog8_math.multiply_words.multiplier") asmgen.out(" lda $otherName | sta prog8_math.multiply_words.multiplier")
@@ -2966,7 +2968,7 @@ $shortcutLabel:""")
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1") "+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1") "-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> { "*" -> {
if(block?.options?.veraFxMuls==true) if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware muls // cx16 verafx hardware muls
asmgen.out(""" asmgen.out("""
lda $name lda $name
@@ -2977,9 +2979,10 @@ $shortcutLabel:""")
ldy $otherName+1 ldy $otherName+1
sta cx16.r1 sta cx16.r1
sty cx16.r1+1 sty cx16.r1+1
jsr verafx.muls jsr verafx.muls16
sta $name sta $name
sty $name+1""") sty $name+1""")
}
else else
asmgen.out(""" asmgen.out("""
lda $otherName lda $otherName
@@ -3170,7 +3173,7 @@ $shortcutLabel:""")
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression, block: PtBlock?) { private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression, block: PtBlock?) {
require(dt.isWord) require(dt.isWord)
fun multiplyVarByWordInAX() { fun multiplyVarByWordInAX() {
if(block?.options?.veraFxMuls==true) if(block?.options?.veraFxMuls==true) {
// cx16 verafx hardware muls // cx16 verafx hardware muls
asmgen.out(""" asmgen.out("""
sta cx16.r1 sta cx16.r1
@@ -3179,9 +3182,10 @@ $shortcutLabel:""")
ldx $name+1 ldx $name+1
sta cx16.r0 sta cx16.r0
stx cx16.r0+1 stx cx16.r0+1
jsr verafx.muls jsr verafx.muls16
sta $name sta $name
sty $name+1""") sty $name+1""")
}
else else
asmgen.out(""" asmgen.out("""
sta prog8_math.multiply_words.multiplier sta prog8_math.multiply_words.multiplier

View File

@@ -686,6 +686,7 @@ internal class ExpressionGen(private val codeGen: IRCodeGen) {
addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.WORD, reg1=tr.resultReg, immediate = 0), null) addInstr(result, IRInstruction(Opcode.CMPI, IRDataType.WORD, reg1=tr.resultReg, immediate = 0), null)
actualResultReg2 = loadStatusAsBooleanResult(Opcode.BSTNE, result) actualResultReg2 = loadStatusAsBooleanResult(Opcode.BSTNE, result)
} }
valueDt.isLong -> TODO("typecast long ${cast.position}")
valueDt.isFloat -> { valueDt.isFloat -> {
actualResultReg2 = codeGen.registers.next(IRDataType.BYTE) actualResultReg2 = codeGen.registers.next(IRDataType.BYTE)
result += IRCodeChunk(null, null).also { result += IRCodeChunk(null, null).also {

View File

@@ -116,26 +116,29 @@ verafx {
asmsub mult16(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY { asmsub mult16(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY {
; Returns the 16 bits unsigned result of R0*R1 in AY. ; Returns the lower 16 bits unsigned result of R0*R1 in AY
; Note: only the lower 16 bits! (the upper 16 bits are not valid for unsigned word multiplications, only for signed) ; Note: only the lower 16 bits! (the upper 16 bits are not valid for unsigned word multiplications, only for signed)
; Verafx doesn't support unsigned values like this for full 32 bit result. ; Verafx doesn't support unsigned values like this for full 32 bit result.
; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive) ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
%asm {{ %asm {{
lda cx16.r0 jmp muls16
sta P8ZP_SCRATCH_W1 }}
lda cx16.r0+1 }
sta P8ZP_SCRATCH_W1+1
jsr verafx.muls asmsub muls16(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY {
ldx P8ZP_SCRATCH_W1 ; Returns just the lower 16 bits signed result of the multiplication in cx16.AY.
stx cx16.r0 ; Note: clobbers R0, R1, and VRAM $1f9bc - $1f9bf (inclusive)
ldx P8ZP_SCRATCH_W1+1 %asm {{
stx cx16.r0+1 jsr muls
lda cx16.r0L
ldy cx16.r0H
rts rts
}} }}
} }
asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 {
; Returns the 32 bits signed result in AY and R0 (lower word, upper word). asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> long @R0R1_32 {
; Returns the 32 bits signed result in R0:R1 (lower word, upper word).
; Vera Fx multiplication support only works on signed values! ; Vera Fx multiplication support only works on signed values!
; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive) ; Note: clobbers VRAM $1f9bc - $1f9bf (inclusive)
%asm {{ %asm {{
@@ -171,12 +174,14 @@ verafx {
stz cx16.VERA_DATA0 ; multiply and write out result stz cx16.VERA_DATA0 ; multiply and write out result
lda #%00010001 ; $01 with Increment 1 lda #%00010001 ; $01 with Increment 1
sta cx16.VERA_ADDR_H ; so we can read out the result sta cx16.VERA_ADDR_H ; so we can read out the result
lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in AY lda cx16.VERA_DATA0 ; store the lower 16 bits of the result in R0
ldy cx16.VERA_DATA0 ldy cx16.VERA_DATA0
ldx cx16.VERA_DATA0 ; store the upper 16 bits of the result in R0 sta cx16.r0L
stx cx16.r0s sty cx16.r0H
ldx cx16.VERA_DATA0 lda cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1
stx cx16.r0s+1 ldy cx16.VERA_DATA0 ; store the upper 16 bits of the result in R1
sta cx16.r1L
sty cx16.r1H
stz cx16.VERA_FX_CTRL ; Cache write disable stz cx16.VERA_FX_CTRL ; Cache write disable
stz cx16.VERA_FX_MULT ; $9F2C reset multiply bit stz cx16.VERA_FX_MULT ; $9F2C reset multiply bit
stz cx16.VERA_CTRL ; reset DCSEL stz cx16.VERA_CTRL ; reset DCSEL

View File

@@ -220,6 +220,12 @@ _sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
}} }}
} }
sub mul32(uword a, uword b) -> long {
; return 32 bits result of a*b
cx16.r2 = a*b
return mklong2(mul16_last_upper(), cx16.r2)
}
sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte { sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A. ; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
cx16.r0L = 3 ; quadrant cx16.r0L = 3 ; quadrant

View File

@@ -304,6 +304,12 @@ math {
}} }}
} }
sub mul32(uword a, uword b) -> long {
; return 32 bits result of a*b
cx16.r2 = a*b
return mklong2(mul16_last_upper(), cx16.r2)
}
sub diff(ubyte b1, ubyte b2) -> ubyte { sub diff(ubyte b1, ubyte b2) -> ubyte {
if b1>b2 if b1>b2
return b1-b2 return b1-b2

View File

@@ -1239,7 +1239,10 @@ Available for the Cx16 target. Routines that use the Vera FX logic to accelerate
But it depends on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage. But it depends on some Vera manipulation and 4 bytes in vram just below the PSG registers for storage.
Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block Note: there is a block level %option "verafxmuls" that automatically replaces all word multiplications in that block
by calls to verafx, but be careful with it because it may interfere with other Vera operations or IRQs. by calls to verafx, but be careful with it because it may interfere with other Vera operations or IRQs.
The full 32 bits result value is returned in two result values: lower word, upper word. The full 32 bits result value is returned as a long.
``muls16``
Like ``muls`` but only returns the lower word of the result, which is sometimes useful if you're just interested in word values.
``mult16`` ``mult16``
VeraFX hardware multiplication of two unsigned words. VeraFX hardware multiplication of two unsigned words.

View File

@@ -3,9 +3,7 @@ TODO
LONG TYPE LONG TYPE
--------- ---------
- scan through more library routines if there are opportunities to use a long param or returnvalue? - document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, math.mul32, verafx.muls/muls16, and the use of R0:R1 when doing LONG calculations, asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
- document the new long type! and mklong(a,b,c,d) and mklong2(w1,w2) , print_l , print_ulhex (& conv.str_l) and pokel, peekl, cbm.SETTIML/RDTIML, and the use of R0:R1 when doing LONG calculations
- asmsub call convention: @R0R1_32 to specify a 32 bits long combined register R0:R1
- how hard is it to also implement the other comparison operators (<,>,<=,>=) on longs? - how hard is it to also implement the other comparison operators (<,>,<=,>=) on longs?
- implement LONG testcases in testmemory - implement LONG testcases in testmemory

View File

@@ -1,95 +1,32 @@
%import textio %import textio
%import math %import math
%import verafx
%zeropage basicsafe %zeropage basicsafe
main { main {
%option verafxmuls
sub start() { sub start() {
long @shared lv1 = 12345678
long @shared lv2same = 12345678
long @shared lv2different = 999999
if lv1==0 cx16.r5s = 22
txt.print("wrong1\n") cx16.r6s = -999
if lv1==0 cx16.r0s = cx16.r5s * cx16.r6s
txt.print("wrong2\n") txt.print_w(cx16.r0s)
else txt.nl()
txt.print("ok2\n")
if lv1!=0 long lv = cx16.r5s * cx16.r6s
txt.print("ok3\n") txt.print_l(lv)
txt.nl()
if lv1!=0
txt.print("ok4\n")
else
txt.print("wrong4\n")
if lv1==999999 cx16.r5s = 5555
txt.print("wrong5\n") cx16.r6s = -9999
lv = cx16.r5s * cx16.r6s
if lv1==999999 txt.print_l(lv)
txt.print("wrong6\n") txt.nl()
else lv = verafx.muls(cx16.r5s, cx16.r6s)
txt.print("ok6\n") txt.print_l(lv)
txt.nl()
if lv1!=999999
txt.print("ok7\n")
if lv1!=999999
txt.print("ok8\n")
else
txt.print("wrong8\n")
if lv1==12345678
txt.print("ok9\n")
if lv1==12345678
txt.print("ok10\n")
else
txt.print("wrong10\n")
if lv1!=12345678
txt.print("wrong11\n")
if lv1!=12345678
txt.print("wrong12\n")
else
txt.print("ok12\n")
if lv1==lv2same
txt.print("ok13\n")
if lv1==lv2same
txt.print("ok14\n")
else
txt.print("wrong14\n")
if lv1!=lv2same
txt.print("wrong15\n")
if lv1!=lv2same
txt.print("wrong16\n")
else
txt.print("ok16\n")
if lv1==lv2different
txt.print("wrong17\n")
if lv1==lv2different
txt.print("wrong18\n")
else
txt.print("ok18\n")
if lv1!=lv2different
txt.print("ok19\n")
if lv1!=lv2different
txt.print("ok20\n")
else
txt.print("wrong20\n")
} }
} }