added optimized integer square (x*x) routine

This commit is contained in:
Irmen de Jong 2021-03-08 23:08:47 +01:00
parent e4bca5fe47
commit 60e169bd87
4 changed files with 155 additions and 2 deletions

View File

@ -1541,3 +1541,71 @@ _negative lsr a
rts
.pend
square .proc
; -- calculate square root of signed word in AY, result in AY
; routine by Lee Davsion, source: http://6502.org/source/integers/square.htm
; using this routine is about twice as fast as doing a regular multiplication.
;
; Calculates the 16 bit unsigned integer square of the signed 16 bit integer in
; Numberl/Numberh. The result is always in the range 0 to 65025 and is held in
; Squarel/Squareh
;
; The maximum input range is only +/-255 and no checking is done to ensure that
; this is so.
;
; This routine is useful if you are trying to draw circles as for any circle
;
; x^2+y^2=r^2 where x and y are the co-ordinates of any point on the circle and
; r is the circle radius
numberl = P8ZP_SCRATCH_W1 ; number to square low byte
numberh = P8ZP_SCRATCH_W1+1 ; number to square high byte
squarel = P8ZP_SCRATCH_W2 ; square low byte
squareh = P8ZP_SCRATCH_W2+1 ; square high byte
tempsq = P8ZP_SCRATCH_B1 ; temp byte for intermediate result
sta numberl
sty numberh
stx P8ZP_SCRATCH_REG
lda #$00 ; clear a
sta squarel ; clear square low byte
; (no need to clear the high byte, it gets shifted out)
lda numberl ; get number low byte
ldx numberh ; get number high byte
bpl _nonneg ; if +ve don't negate it
; else do a two's complement
eor #$ff ; invert
sec ; +1
adc #$00 ; and add it
_nonneg:
sta tempsq ; save abs(number)
ldx #$08 ; set bit count
_nextr2bit:
asl squarel ; low byte *2
rol squareh ; high byte *2+carry from low
asl a ; shift number byte
bcc _nosqadd ; don't do add if c = 0
tay ; save a
clc ; clear carry for add
lda tempsq ; get number
adc squarel ; add number^2 low byte
sta squarel ; save number^2 low byte
lda #$00 ; clear a
adc squareh ; add number^2 high byte
sta squareh ; save number^2 high byte
tya ; get a back
_nosqadd:
dex ; decrement bit count
bne _nextr2bit ; go do next bit
lda squarel
ldy squareh
ldx P8ZP_SCRATCH_REG
rts
.pend

View File

@ -1746,6 +1746,13 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
}
}
"*" -> {
if(leftDt in IntegerDatatypes && rightDt in IntegerDatatypes) {
val leftVar = expr.left as? IdentifierReference
val rightVar = expr.right as? IdentifierReference
if(leftVar!=null && rightVar!=null && leftVar==rightVar)
return translateSquared(leftVar, leftDt)
}
val value = expr.right.constValue(program)
if(value!=null) {
if(rightDt in IntegerDatatypes) {
@ -1842,6 +1849,22 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
}
}
private fun translateSquared(variable: IdentifierReference, dt: DataType) {
val asmVar = asmgen.asmVariableName(variable)
when(dt) {
DataType.BYTE, DataType.UBYTE -> {
asmgen.out(" lda $asmVar")
asmgen.signExtendAYlsb(dt)
asmgen.out(" jsr math.square")
}
DataType.UWORD, DataType.WORD -> {
asmgen.out(" lda $asmVar | ldy $asmVar+1 | jsr math.square")
}
else -> throw AssemblyError("require integer dt for square")
}
asmgen.out(" sta P8ESTACK_LO,x | tya | sta P8ESTACK_HI,x | dex")
}
private fun translateExpression(expr: PrefixExpression) {
translateExpression(expr.expression)
val itype = expr.inferType(program)

View File

@ -739,9 +739,11 @@ sin16(x)
sqrt16(w)
16 bit unsigned integer Square root. Result is unsigned byte.
To do the reverse, squaring an integer, just write ``x*x``.
sqrt(x)
Floating point Square root.
To do the reverse, squaring a floating point number, just write ``x*x`` or ``x**2``.
tan(x)
Tangent.

View File

@ -1,9 +1,69 @@
%import textio
%zeropage basicsafe
%import floats
%import test_stack
%zeropage dontuse
; TODO fix float conversion crashes on Cx16 (ubyte as float, uword as float)
main {
sub start() {
txt.print("hello")
uword total=0
uword xx
float fl
float fltotal=0.0
ubyte ub = 22
for xx in 0 to 100 {
txt.print_uw(xx*xx)
txt.chrout(',')
}
txt.nl()
total = 0
c64.SETTIM(0,0,0)
repeat 5 {
for xx in 1 to 255 {
total += xx*xx
}
}
txt.print_uw(total)
txt.nl()
txt.print_uw(c64.RDTIM16())
txt.nl()
txt.nl()
test_stack.test()
; fltotal=0.0
; c64.SETTIM(0,0,0)
; repeat 5 {
; for xx in 1 to 255 {
; fl = xx as float
; ; fl = ub as float
; fltotal = fl * fl
; }
; }
;
; floats.print_f(fltotal)
; txt.nl()
; txt.print_uw(c64.RDTIM16())
; txt.nl()
; txt.nl()
;
; fltotal=0.0
; c64.SETTIM(0,0,0)
; repeat 5 {
; for xx in 1 to 255 {
; fl = xx as float
; ; fl = ub as float
; fltotal = fl ** 2
; }
; }
;
; floats.print_f(fltotal)
; txt.nl()
; txt.print_uw(c64.RDTIM16())
; txt.nl()
; txt.nl()
}
}