diff --git a/compiler/res/prog8lib/cx16/verafx.p8 b/compiler/res/prog8lib/cx16/verafx.p8 index 47636a6b5..00eb7cccb 100644 --- a/compiler/res/prog8lib/cx16/verafx.p8 +++ b/compiler/res/prog8lib/cx16/verafx.p8 @@ -111,15 +111,17 @@ verafx { ; unsigned multiplication just passes the values as signed to muls ; if you do this yourself in your call to muls, it will save a few instructions. - inline asmsub mult(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY, uword @R0 { - ; Returns the 32 bits unsigned result in AY and R0 (lower word, upper word). - %asm {{ - jsr verafx.muls - }} - } + ; TODO fix this: verafx.muls doesn't support unsigned values like this +; inline asmsub mult(uword value1 @R0, uword value2 @R1) clobbers(X) -> uword @AY, uword @R0 { +; ; Returns the 32 bits unsigned result in AY and R0 (lower word, upper word). +; %asm {{ +; jsr verafx.muls +; }} +; } asmsub muls(word value1 @R0, word value2 @R1) clobbers(X) -> word @AY, word @R0 { ; Returns the 32 bits signed result in AY and R0 (lower word, upper word). + ; Vera Fx multiplication support only works on signed values! %asm {{ lda #(2 << 1) sta cx16.VERA_CTRL ; $9F25 diff --git a/compiler/res/prog8lib/math.asm b/compiler/res/prog8lib/math.asm index 8ea07cc49..97ade9ea0 100644 --- a/compiler/res/prog8lib/math.asm +++ b/compiler/res/prog8lib/math.asm @@ -55,13 +55,14 @@ _multiplier = P8ZP_SCRATCH_REG multiply_words .proc - ; -- multiply two 16-bit words into a 32-bit result (signed and unsigned) + ; -- multiply two 16-bit words into a 32-bit result (UNSIGNED) ; input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number ; output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY. + ; you can retrieve the upper 16 bits via math.mul16_last_upper() - ; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array. - ; this routine could be faster if we could stick that into zeropage, - ; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)... + ; NOTE FOR NEGATIVE VALUES: + ; The routine also works for NEGATIVE (signed) word values, but ONLY the lower 16 bits of the result are correct then! + ; Prog8 only uses those so that's not an issue, but math.mul16_last_upper() no longer gives the correct result here. ; mult62.a ; from: https://github.com/TobyLobster/multiply_test/blob/main/tests/mult62.a @@ -179,7 +180,7 @@ _inner_loop2 ldy result+1 rts -result .byte 0,0,0,0 +result .byte 0,0,0,0 ; routine could be faster if this were in Zeropage... .pend diff --git a/compiler/res/prog8lib/math.p8 b/compiler/res/prog8lib/math.p8 index b472d8897..5c33bae41 100644 --- a/compiler/res/prog8lib/math.p8 +++ b/compiler/res/prog8lib/math.p8 @@ -168,6 +168,9 @@ _sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0))) ; for instance, simply printing a number may already result in new multiplication calls being performed ; - not all multiplications in the source code result in an actual multiplication call: ; some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all! + ; - THE RESULT IS ONLY VALID IF THE MULTIPLICATION WAS DONE WITH UWORD ARGUMENTS (or two positive WORD arguments) + ; as soon as a negative word value (or 2) was used in the multiplication, these upper 16 bits are not valid!! + ; Suggestion (if you are on the Commander X16): use verafx.muls() to get a hardware accelerated 32 bit signed multplication. %asm {{ lda multiply_words.result+2 ldy multiply_words.result+3 diff --git a/compiler/res/prog8lib/virtual/math.p8 b/compiler/res/prog8lib/virtual/math.p8 index 85aa296be..9f77c749f 100644 --- a/compiler/res/prog8lib/virtual/math.p8 +++ b/compiler/res/prog8lib/virtual/math.p8 @@ -293,6 +293,8 @@ math { ; for instance, simply printing a number may already result in new multiplication calls being performed ; - not all multiplications in the source code result in an actual multiplication call: ; some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all! + ; - THE RESULT IS ONLY VALID IF THE MULTIPLICATION WAS DONE WITH UWORD ARGUMENTS (or two positive WORD arguments) + ; as soon as a negative word value (or 2) was used in the multiplication, these upper 16 bits are not valid!! %ir {{ syscall 33 (): r0.w returnr.w r0 diff --git a/docs/source/libraries.rst b/docs/source/libraries.rst index ce4da7d5c..6af8a9545 100644 --- a/docs/source/libraries.rst +++ b/docs/source/libraries.rst @@ -787,6 +787,10 @@ but perhaps the provided ones can be of service too. It does not work for the verafx multiplication routines on the Commander X16! These have a different way to obtain the upper 16 bits of the result: just read cx16.r0. + **NOTE:** the result is only valid if the multiplication was done with uword arguments (or two positive word arguments). + As soon as a single negative word value (or both) was used in the multiplication, these upper 16 bits are not valid! + Suggestion (if you are on the Commander X16): use ``verafx.muls()`` to get a hardware accelerated 32 bit signed multiplication. + ``crc16 (uword data, uword length) -> uword`` Returns a CRC-16 (XMODEM) checksum over the given data buffer. Note: on the Commander X16, there is a CRC-16 routine in the kernal: cx16.memory_crc(). diff --git a/docs/source/todo.rst b/docs/source/todo.rst index ebce982d8..4de567291 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -14,7 +14,6 @@ Compiler: - Can we support signed % (remainder) somehow? - Don't add "random" rts to %asm blocks but instead give a warning about it? (but this breaks existing behavior that others already depend on... command line switch? block directive?) - IR: implement missing operators in AssignmentGen (array shifts etc) -- IR: CMPI+BSTEQ --> new BEQ reg,value,label instruction (like BGT etc) - instead of copy-pasting inline asmsubs, make them into a 64tass macro and use that instead. that will allow them to be reused from custom user written assembly code as well. - Multidimensional arrays and chained indexing, purely as syntactic sugar over regular arrays.