fix word multiplication to not clobber r0 and r1 anymore

This was causing corruption in certain programs such as the cx16/amiga example.
The problem was introduced in 9.4 with the new multiply_words routine
This commit is contained in:
Irmen de Jong
2023-09-02 12:02:07 +02:00
parent 3b786c819d
commit e97303c226
5 changed files with 65 additions and 45 deletions

View File

@@ -543,7 +543,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
return true
}
in WordDatatypes -> {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
assignRegisterpairWord(target, RegisterOrPair.AY)
return true
@@ -567,7 +567,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
asmgen.out(" jsr math.mul_word_${value}")
}
else {
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
asmgen.out(" jsr math.multiply_words")
}
assignRegisterpairWord(target, RegisterOrPair.AY)

View File

@@ -1335,9 +1335,9 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
} else {
asmgen.out("""
lda $name
sta cx16.r0
sta math.multiply_words.multiplier
lda $name+1
sta cx16.r0+1
sta math.multiply_words.multiplier+1
lda #<$value
ldy #>$value
jsr math.multiply_words
@@ -1786,15 +1786,15 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
}
"*" -> {
if(valueDt==DataType.UBYTE) {
asmgen.out(" lda $otherName | sta cx16.r0")
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
if(asmgen.isTargetCpu(CpuType.CPU65c02))
asmgen.out(" stz cx16.r0+1")
asmgen.out(" stz math.multiply_words.multiplier+1")
else
asmgen.out(" lda #0 | sta cx16.r0+1")
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1")
} else {
asmgen.out(" lda $otherName")
asmgen.signExtendAYlsb(valueDt)
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1")
}
asmgen.out("""
lda $name
@@ -1930,31 +1930,16 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
"*" -> {
if(otherName=="cx16.r0")
asmgen.out("""
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
else if(name=="cx16.r0")
asmgen.out("""
lda $otherName
ldy $otherName+1
jsr math.multiply_words
sta $name
sty $name+1""")
else
asmgen.out("""
lda $otherName
ldy $otherName+1
sta cx16.r0
sty cx16.r0+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
asmgen.out("""
lda $otherName
ldy $otherName+1
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words
sta $name
sty $name+1""")
}
"/" -> {
if(dt==DataType.WORD) {
@@ -2135,8 +2120,8 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
fun multiplyVarByWordInAY() {
asmgen.out("""
sta cx16.r0
sty cx16.r0+1
sta math.multiply_words.multiplier
sty math.multiply_words.multiplier+1
lda $name
ldy $name+1
jsr math.multiply_words

View File

@@ -56,9 +56,12 @@ _multiplier = P8ZP_SCRATCH_REG
multiply_words .proc
; -- multiply two 16-bit words into a 32-bit result (signed and unsigned)
; input: A/Y = first 16-bit number, cx16.R0 = second 16-bit number
; output: multiply_words.result == cx16.R0:R1, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY.
; TODO: should not use R0 and R1 at all !!! result needs 4 consecutive bytes, so it can't be in zeropage at all...
; input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number
; output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY.
; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array.
; this routine could be faster if we could stick that into zeropage,
; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...
; mult62.a
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
@@ -73,9 +76,8 @@ multiply_words .proc
; Average cycles:
; 93 bytes
_multiplicand = P8ZP_SCRATCH_W1 ; 2 bytes
_multiplier = cx16.r0 ; 2 bytes
result = cx16.r0 ; 4 bytes (note: shares memory with multiplier) so is r0 and ALSO r1.
_multiplicand = P8ZP_SCRATCH_W2 ; 2 bytes
multiplier = result
; 16 bit x 16 bit unsigned multiply, 32 bit result
;
@@ -175,6 +177,9 @@ _inner_loop2
lda result
ldy result+1
rts
result .byte 0,0,0,0
.pend

View File

@@ -1,10 +1,8 @@
TODO
====
- fix on c64 target: examples/cube3d-float (broken since 9.3 with the evalstack removal) it works on x16 target, oddly enough.
More detailed and simpler code for this problem in floatproblem64.p8
- fix: amiga example with noopt draws wrong lines, caused by "2x faster word multiplication routine" because it trashes r0 and r1 now
multiply_words in math.asm needs fixing.
- fix: test all other things with noopt once again! (examples/c64 are all ok)
- fix: search for TODO("swap operand order")
- optimize: search for TODO optimize: don't use scratch var
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?

32
floatproblem64.p8 Normal file
View File

@@ -0,0 +1,32 @@
%import textio
%import floats
%zeropage dontuse
main {
sub start() {
float value1 = -0.8
float value2 = 0.3
float two = 2.0
float result = value1*two + value2*two ; TODO FIX: invalid result on c64, ok when the *two is removed or expression is split (it's not caused by pushFAC1/popFAC1)
floats.print_f(result)
txt.nl()
txt.print("-1 was expected\n\n") ; on C64: -1.1 is printed :(
result = value2*two + value1*two ; swapped operands around, now it's suddenly fine on C64...
floats.print_f(result)
txt.nl()
txt.print("-1 was expected\n\n") ; on C64: correct value is printed
value1 = 0.8
value2 = 0.3
result = value1*two + value2*two
floats.print_f(result)
txt.nl()
txt.print("2.2 was expected\n\n") ; on C64: correct value is printed
repeat {
}
}
}