mirror of
https://github.com/irmen/prog8.git
synced 2025-11-02 13:16:07 +00:00
fix word multiplication to not clobber r0 and r1 anymore
This was causing corruption in certain programs such as the cx16/amiga example. The problem was introduced in 9.4 with the new multiply_words routine
This commit is contained in:
@@ -543,7 +543,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
|
|||||||
return true
|
return true
|
||||||
}
|
}
|
||||||
in WordDatatypes -> {
|
in WordDatatypes -> {
|
||||||
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
|
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
|
||||||
asmgen.out(" jsr math.multiply_words")
|
asmgen.out(" jsr math.multiply_words")
|
||||||
assignRegisterpairWord(target, RegisterOrPair.AY)
|
assignRegisterpairWord(target, RegisterOrPair.AY)
|
||||||
return true
|
return true
|
||||||
@@ -567,7 +567,7 @@ internal class AssignmentAsmGen(private val program: PtProgram,
|
|||||||
asmgen.out(" jsr math.mul_word_${value}")
|
asmgen.out(" jsr math.mul_word_${value}")
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "cx16.r0")
|
asmgen.assignWordOperandsToAYAndVar(expr.right, expr.left, "math.multiply_words.multiplier")
|
||||||
asmgen.out(" jsr math.multiply_words")
|
asmgen.out(" jsr math.multiply_words")
|
||||||
}
|
}
|
||||||
assignRegisterpairWord(target, RegisterOrPair.AY)
|
assignRegisterpairWord(target, RegisterOrPair.AY)
|
||||||
|
|||||||
@@ -1335,9 +1335,9 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
|
|||||||
} else {
|
} else {
|
||||||
asmgen.out("""
|
asmgen.out("""
|
||||||
lda $name
|
lda $name
|
||||||
sta cx16.r0
|
sta math.multiply_words.multiplier
|
||||||
lda $name+1
|
lda $name+1
|
||||||
sta cx16.r0+1
|
sta math.multiply_words.multiplier+1
|
||||||
lda #<$value
|
lda #<$value
|
||||||
ldy #>$value
|
ldy #>$value
|
||||||
jsr math.multiply_words
|
jsr math.multiply_words
|
||||||
@@ -1786,15 +1786,15 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
|
|||||||
}
|
}
|
||||||
"*" -> {
|
"*" -> {
|
||||||
if(valueDt==DataType.UBYTE) {
|
if(valueDt==DataType.UBYTE) {
|
||||||
asmgen.out(" lda $otherName | sta cx16.r0")
|
asmgen.out(" lda $otherName | sta math.multiply_words.multiplier")
|
||||||
if(asmgen.isTargetCpu(CpuType.CPU65c02))
|
if(asmgen.isTargetCpu(CpuType.CPU65c02))
|
||||||
asmgen.out(" stz cx16.r0+1")
|
asmgen.out(" stz math.multiply_words.multiplier+1")
|
||||||
else
|
else
|
||||||
asmgen.out(" lda #0 | sta cx16.r0+1")
|
asmgen.out(" lda #0 | sta math.multiply_words.multiplier+1")
|
||||||
} else {
|
} else {
|
||||||
asmgen.out(" lda $otherName")
|
asmgen.out(" lda $otherName")
|
||||||
asmgen.signExtendAYlsb(valueDt)
|
asmgen.signExtendAYlsb(valueDt)
|
||||||
asmgen.out(" sta cx16.r0 | sty cx16.r0+1")
|
asmgen.out(" sta math.multiply_words.multiplier | sty math.multiply_words.multiplier+1")
|
||||||
}
|
}
|
||||||
asmgen.out("""
|
asmgen.out("""
|
||||||
lda $name
|
lda $name
|
||||||
@@ -1930,31 +1930,16 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
|
|||||||
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
|
"+" -> asmgen.out(" lda $name | clc | adc $otherName | sta $name | lda $name+1 | adc $otherName+1 | sta $name+1")
|
||||||
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
|
"-" -> asmgen.out(" lda $name | sec | sbc $otherName | sta $name | lda $name+1 | sbc $otherName+1 | sta $name+1")
|
||||||
"*" -> {
|
"*" -> {
|
||||||
if(otherName=="cx16.r0")
|
asmgen.out("""
|
||||||
asmgen.out("""
|
lda $otherName
|
||||||
lda $name
|
ldy $otherName+1
|
||||||
ldy $name+1
|
sta math.multiply_words.multiplier
|
||||||
jsr math.multiply_words
|
sty math.multiply_words.multiplier+1
|
||||||
sta $name
|
lda $name
|
||||||
sty $name+1""")
|
ldy $name+1
|
||||||
else if(name=="cx16.r0")
|
jsr math.multiply_words
|
||||||
asmgen.out("""
|
sta $name
|
||||||
lda $otherName
|
sty $name+1""")
|
||||||
ldy $otherName+1
|
|
||||||
jsr math.multiply_words
|
|
||||||
sta $name
|
|
||||||
sty $name+1""")
|
|
||||||
else
|
|
||||||
asmgen.out("""
|
|
||||||
lda $otherName
|
|
||||||
ldy $otherName+1
|
|
||||||
sta cx16.r0
|
|
||||||
sty cx16.r0+1
|
|
||||||
lda $name
|
|
||||||
ldy $name+1
|
|
||||||
jsr math.multiply_words
|
|
||||||
sta $name
|
|
||||||
sty $name+1""")
|
|
||||||
}
|
}
|
||||||
"/" -> {
|
"/" -> {
|
||||||
if(dt==DataType.WORD) {
|
if(dt==DataType.WORD) {
|
||||||
@@ -2135,8 +2120,8 @@ internal class AugmentableAssignmentAsmGen(private val program: PtProgram,
|
|||||||
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
|
private fun inplacemodificationWordWithValue(name: String, dt: DataType, operator: String, value: PtExpression) {
|
||||||
fun multiplyVarByWordInAY() {
|
fun multiplyVarByWordInAY() {
|
||||||
asmgen.out("""
|
asmgen.out("""
|
||||||
sta cx16.r0
|
sta math.multiply_words.multiplier
|
||||||
sty cx16.r0+1
|
sty math.multiply_words.multiplier+1
|
||||||
lda $name
|
lda $name
|
||||||
ldy $name+1
|
ldy $name+1
|
||||||
jsr math.multiply_words
|
jsr math.multiply_words
|
||||||
|
|||||||
@@ -56,9 +56,12 @@ _multiplier = P8ZP_SCRATCH_REG
|
|||||||
|
|
||||||
multiply_words .proc
|
multiply_words .proc
|
||||||
; -- multiply two 16-bit words into a 32-bit result (signed and unsigned)
|
; -- multiply two 16-bit words into a 32-bit result (signed and unsigned)
|
||||||
; input: A/Y = first 16-bit number, cx16.R0 = second 16-bit number
|
; input: A/Y = first 16-bit number, multiply_words.multiplier = second 16-bit number
|
||||||
; output: multiply_words.result == cx16.R0:R1, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY.
|
; output: multiply_words.result, 4-bytes/32-bits product, LSB order (low-to-high) low 16 bits also in AY.
|
||||||
; TODO: should not use R0 and R1 at all !!! result needs 4 consecutive bytes, so it can't be in zeropage at all...
|
|
||||||
|
; NOTE: the result (which includes the multiplier parameter on entry) is a 4-byte array.
|
||||||
|
; this routine could be faster if we could stick that into zeropage,
|
||||||
|
; but there currently is no way to use 4 consecutive bytes in ZP (without disabling irq and saving/restoring them)...
|
||||||
|
|
||||||
; mult62.a
|
; mult62.a
|
||||||
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
|
; based on Dr Jefyll, http://forum.6502.org/viewtopic.php?f=9&t=689&start=0#p19958
|
||||||
@@ -73,9 +76,8 @@ multiply_words .proc
|
|||||||
; Average cycles:
|
; Average cycles:
|
||||||
; 93 bytes
|
; 93 bytes
|
||||||
|
|
||||||
_multiplicand = P8ZP_SCRATCH_W1 ; 2 bytes
|
_multiplicand = P8ZP_SCRATCH_W2 ; 2 bytes
|
||||||
_multiplier = cx16.r0 ; 2 bytes
|
multiplier = result
|
||||||
result = cx16.r0 ; 4 bytes (note: shares memory with multiplier) so is r0 and ALSO r1.
|
|
||||||
|
|
||||||
; 16 bit x 16 bit unsigned multiply, 32 bit result
|
; 16 bit x 16 bit unsigned multiply, 32 bit result
|
||||||
;
|
;
|
||||||
@@ -175,6 +177,9 @@ _inner_loop2
|
|||||||
lda result
|
lda result
|
||||||
ldy result+1
|
ldy result+1
|
||||||
rts
|
rts
|
||||||
|
|
||||||
|
result .byte 0,0,0,0
|
||||||
|
|
||||||
.pend
|
.pend
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,10 +1,8 @@
|
|||||||
TODO
|
TODO
|
||||||
====
|
====
|
||||||
|
- fix on c64 target: examples/cube3d-float (broken since 9.3 with the evalstack removal) it works on x16 target, oddly enough.
|
||||||
|
More detailed and simpler code for this problem in floatproblem64.p8
|
||||||
|
|
||||||
- fix: amiga example with noopt draws wrong lines, caused by "2x faster word multiplication routine" because it trashes r0 and r1 now
|
|
||||||
multiply_words in math.asm needs fixing.
|
|
||||||
|
|
||||||
- fix: test all other things with noopt once again! (examples/c64 are all ok)
|
|
||||||
- fix: search for TODO("swap operand order")
|
- fix: search for TODO("swap operand order")
|
||||||
- optimize: search for TODO optimize: don't use scratch var
|
- optimize: search for TODO optimize: don't use scratch var
|
||||||
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?
|
- prefix prog8 subroutines with p8s_ instead of p8_ to not let them clash with variables in the asm?
|
||||||
|
|||||||
32
floatproblem64.p8
Normal file
32
floatproblem64.p8
Normal file
@@ -0,0 +1,32 @@
|
|||||||
|
%import textio
|
||||||
|
%import floats
|
||||||
|
%zeropage dontuse
|
||||||
|
|
||||||
|
main {
|
||||||
|
sub start() {
|
||||||
|
float value1 = -0.8
|
||||||
|
float value2 = 0.3
|
||||||
|
float two = 2.0
|
||||||
|
|
||||||
|
float result = value1*two + value2*two ; TODO FIX: invalid result on c64, ok when the *two is removed or expression is split (it's not caused by pushFAC1/popFAC1)
|
||||||
|
floats.print_f(result)
|
||||||
|
txt.nl()
|
||||||
|
txt.print("-1 was expected\n\n") ; on C64: -1.1 is printed :(
|
||||||
|
|
||||||
|
result = value2*two + value1*two ; swapped operands around, now it's suddenly fine on C64...
|
||||||
|
floats.print_f(result)
|
||||||
|
txt.nl()
|
||||||
|
txt.print("-1 was expected\n\n") ; on C64: correct value is printed
|
||||||
|
|
||||||
|
|
||||||
|
value1 = 0.8
|
||||||
|
value2 = 0.3
|
||||||
|
result = value1*two + value2*two
|
||||||
|
floats.print_f(result)
|
||||||
|
txt.nl()
|
||||||
|
txt.print("2.2 was expected\n\n") ; on C64: correct value is printed
|
||||||
|
|
||||||
|
repeat {
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user