From e97303c226bafba0a3e6e490ff9fc1cef65f0da3 Mon Sep 17 00:00:00 2001 From: Irmen de Jong Date: Sat, 2 Sep 2023 12:02:07 +0200 Subject: [PATCH] fix word multiplication to not clobber r0 and r1 anymore This was causing corruption in certain programs such as the cx16/amiga example. The problem was introduced in 9.4 with the new multiply_words routine --- .../cpu6502/assignment/AssignmentAsmGen.kt | 4 +- .../assignment/AugmentableAssignmentAsmGen.kt | 51 +++++++------------ compiler/res/prog8lib/math.asm | 17 ++++--- docs/source/todo.rst | 6 +-- floatproblem64.p8 | 32 ++++++++++++ 5 files changed, 65 insertions(+), 45 deletions(-) create mode 100644 floatproblem64.p8 diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt index 7897110b9..052f01fb5 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AssignmentAsmGen.kt @@ -543,7 +543,7 @@ internal class AssignmentAsmGen(private val program: PtProgram, return true } in WordDatatypes -> { - asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0") + asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") asmgen.out(" jsr math.multiply_words") assignRegisterpairWord(target, RegisterOrPair.AY) return true @@ -567,7 +567,7 @@ internal class AssignmentAsmGen(private val program: PtProgram, asmgen.out(" jsr math.mul_word_${value}") } else { - asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0") + asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier") asmgen.out(" jsr math.multiply_words") } assignRegisterpairWord(target, RegisterOrPair.AY) diff --git a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt index 0e9812e64..3e43feec4 100644 --- a/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt +++ b/codeGenCpu6502/src/prog8/codegen/cpu6502/assignment/AugmentableAssignmentAsmGen.kt @@ -1335,9 +1335,9 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, } else { asmgen.out(""" lda $name - sta cx16.r0 + sta math.multiply_words.multiplier lda $name+1 - sta cx16.r0+1 + sta math.multiply_words.multiplier+1 lda #<$value ldy #>$value jsr math.multiply_words @@ -1786,15 +1786,15 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, } "*" -> { if(valueDt==DataType.UBYTE) { - asmgen.out(" lda $otherName | sta cx16.r0") + asmgen.out(" lda $otherName | sta math.multiply_words.multiplier") if(asmgen.isTargetCpu(CpuType.CPU65c02)) - asmgen.out(" stz cx16.r0+1") + asmgen.out(" stz math.multiply_words.multiplier+1") else - asmgen.out(" lda #0 | sta cx16.r0+1") + asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1") } else { asmgen.out(" lda $otherName") asmgen.signExtendAYlsb(valueDt) - asmgen.out(" sta cx16.r0 | sty cx16.r0+1") + asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1") } asmgen.out(""" lda $name @@ -1930,31 +1930,16 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, "+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1") "-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1") "*" -> { - if(otherName=="cx16.r0") - asmgen.out(""" - lda $name - ldy $name+1 - jsr math.multiply_words - sta $name - sty $name+1""") - else if(name=="cx16.r0") - asmgen.out(""" - lda $otherName - ldy $otherName+1 - jsr math.multiply_words - sta $name - sty $name+1""") - else - asmgen.out(""" - lda $otherName - ldy $otherName+1 - sta cx16.r0 - sty cx16.r0+1 - lda $name - ldy $name+1 - jsr math.multiply_words - sta $name - sty $name+1""") + asmgen.out(""" + lda $otherName + ldy $otherName+1 + sta math.multiply_words.multiplier + sty math.multiply_words.multiplier+1 + lda $name + ldy $name+1 + jsr math.multiply_words + sta $name + sty $name+1""") } "/" -> { if(dt==DataType.WORD) { @@ -2135,8 +2120,8 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram, private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) { fun multiplyVarByWordInAY() { asmgen.out(""" - sta cx16.r0 - sty cx16.r0+1 + sta math.multiply_words.multiplier + sty math.multiply_words.multiplier+1 lda $name ldy $name+1 jsr math.multiply_words diff --git a/compiler/res/prog8lib/math.asm b/compiler/res/prog8lib/math.asm index a7c876992..2b63d1fd8 100644 --- a/compiler/res/prog8lib/math.asm +++ b/compiler/res/prog8lib/math.asm @@ -56,9 +56,12 @@ _multiplier = P8ZP_SCRATCH_REG multiply_words .proc ; -- multiply two 16-bit words into a 32-bit result (signed and unsigned) - ; input: A/Y = first 16-bit number, cx16.R0 = second 16-bit number - ; output: multiply_words.result == cx16.R0:R1, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY. - ; TODO: should not use R0 and R1 at all !!! result needs 4 consecutive bytes, so it can't be in zeropage at all... + ; input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number + ; output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY. + + ; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array. + ; this routine could be faster if we could stick that into zeropage, + ; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)... ; mult62.a ; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958 @@ -73,9 +76,8 @@ multiply_words .proc ; Average cycles: ; 93 bytes -_multiplicand = P8ZP_SCRATCH_W1 ; 2 bytes -_multiplier = cx16.r0 ; 2 bytes -result = cx16.r0 ; 4 bytes (note: shares memory with multiplier) so is r0 and ALSO r1. +_multiplicand = P8ZP_SCRATCH_W2 ; 2 bytes +multiplier = result ; 16 bit x 16 bit unsigned multiply, 32 bit result ; @@ -175,6 +177,9 @@ _inner_loop2 lda result ldy result+1 rts + +result .byte 0,0,0,0 + .pend diff --git a/docs/source/todo.rst b/docs/source/todo.rst index b846a5398..aa42ffb74 100644 --- a/docs/source/todo.rst +++ b/docs/source/todo.rst @@ -1,10 +1,8 @@ TODO ==== +- fix on c64 target: examples/cube3d-float (broken since 9.3 with the evalstack removal) it works on x16 target, oddly enough. + More detailed and simpler code for this problem in floatproblem64.p8 -- fix: amiga example with noopt draws wrong lines, caused by "2x faster word multiplication routine" because it trashes r0 and r1 now - multiply_words in math.asm needs fixing. - -- fix: test all other things with noopt once again! (examples/c64 are all ok) - fix: search for TODO("swap operand order") - optimize: search for TODO optimize: don't use scratch var - prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm? diff --git a/floatproblem64.p8 b/floatproblem64.p8 new file mode 100644 index 000000000..746f071ab --- /dev/null +++ b/floatproblem64.p8 @@ -0,0 +1,32 @@ +%import textio +%import floats +%zeropage dontuse + +main { + sub start() { + float value1 = -0.8 + float value2 = 0.3 + float two = 2.0 + + float result = value1*two + value2*two ; TODO FIX: invalid result on c64, ok when the *two is removed or expression is split (it's not caused by pushFAC1/popFAC1) + floats.print_f(result) + txt.nl() + txt.print("-1 was expected\n\n") ; on C64: -1.1 is printed :( + + result = value2*two + value1*two ; swapped operands around, now it's suddenly fine on C64... + floats.print_f(result) + txt.nl() + txt.print("-1 was expected\n\n") ; on C64: correct value is printed + + + value1 = 0.8 + value2 = 0.3 + result = value1*two + value2*two + floats.print_f(result) + txt.nl() + txt.print("2.2 was expected\n\n") ; on C64: correct value is printed + + repeat { + } + } +}