added optimized integer square (x*x) routine

2025-02-02 19:32:21 +00:00 · 2021-03-08 23:08:47 +01:00 · 2021-03-08 23:08:47 +01:00 · 60e169bd87
commit 60e169bd87
parent e4bca5fe47
4 changed files with 155 additions and 2 deletions
--- a/compiler/res/prog8lib/math.asm
+++ b/compiler/res/prog8lib/math.asm
@ -1541,3 +1541,71 @@ _negative	lsr  a
 		rts
 		.pend

+
+square          .proc
+; -- calculate square root of signed word in AY, result in AY
+; routine by Lee Davsion, source: http://6502.org/source/integers/square.htm
+; using this routine is about twice as fast as doing a regular multiplication.
+;
+; Calculates the 16 bit unsigned integer square of the signed 16 bit integer in
+; Numberl/Numberh.  The result is always in the range 0 to 65025 and is held in
+; Squarel/Squareh
+;
+; The maximum input range is only +/-255 and no checking is done to ensure that
+; this is so.
+;
+; This routine is useful if you are trying to draw circles as for any circle
+;
+; x^2+y^2=r^2 where x and y are the co-ordinates of any point on the circle and
+; r is the circle radius
+
+numberl = P8ZP_SCRATCH_W1       ; number to square low byte
+numberh = P8ZP_SCRATCH_W1+1     ; number to square high byte
+squarel = P8ZP_SCRATCH_W2       ; square low byte
+squareh = P8ZP_SCRATCH_W2+1     ; square high byte
+tempsq = P8ZP_SCRATCH_B1        ; temp byte for intermediate result
+
+	sta  numberl
+	sty  numberh
+	stx  P8ZP_SCRATCH_REG
+
+        lda     #$00        ; clear a
+        sta     squarel     ; clear square low byte
+                            ; (no need to clear the high byte, it gets shifted out)
+        lda	numberl     ; get number low byte
+	ldx	numberh     ; get number high  byte
+	bpl	_nonneg      ; if +ve don't negate it
+                            ; else do a two's complement
+	eor	#$ff        ; invert
+        sec	            ; +1
+	adc	#$00        ; and add it
+
+_nonneg:
+	sta	tempsq      ; save abs(number)
+	ldx	#$08        ; set bit count
+
+_nextr2bit:
+	asl	squarel     ; low byte *2
+	rol	squareh     ; high byte *2+carry from low
+	asl	a           ; shift number byte
+	bcc	_nosqadd     ; don't do add if c = 0
+	tay                 ; save a
+	clc                 ; clear carry for add
+	lda	tempsq      ; get number
+	adc	squarel     ; add number^2 low byte
+	sta	squarel     ; save number^2 low byte
+	lda	#$00        ; clear a
+	adc	squareh     ; add number^2 high byte
+	sta	squareh     ; save number^2 high byte
+	tya                 ; get a back
+
+_nosqadd:
+	dex                 ; decrement bit count
+	bne	_nextr2bit   ; go do next bit
+
+	lda  squarel
+	ldy  squareh
+	ldx  P8ZP_SCRATCH_REG
+	rts
+
+		.pend
--- a/compiler/src/prog8/compiler/target/cpu6502/codegen/ExpressionsAsmGen.kt
+++ b/compiler/src/prog8/compiler/target/cpu6502/codegen/ExpressionsAsmGen.kt
@ -1746,6 +1746,13 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
                }
            }
            "*" -> {
+                if(leftDt in IntegerDatatypes && rightDt in IntegerDatatypes) {
+                    val leftVar = expr.left as? IdentifierReference
+                    val rightVar = expr.right as? IdentifierReference
+                    if(leftVar!=null && rightVar!=null && leftVar==rightVar)
+                        return translateSquared(leftVar, leftDt)
+                }
+
                val value = expr.right.constValue(program)
                if(value!=null) {
                    if(rightDt in IntegerDatatypes) {
@ -1842,6 +1849,22 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
        }
    }

+    private fun translateSquared(variable: IdentifierReference, dt: DataType) {
+        val asmVar = asmgen.asmVariableName(variable)
+        when(dt) {
+            DataType.BYTE, DataType.UBYTE -> {
+                asmgen.out("  lda  $asmVar")
+                asmgen.signExtendAYlsb(dt)
+                asmgen.out("  jsr  math.square")
+            }
+            DataType.UWORD, DataType.WORD -> {
+                asmgen.out("  lda  $asmVar |  ldy  $asmVar+1 |  jsr  math.square")
+            }
+            else -> throw AssemblyError("require integer dt for square")
+        }
+        asmgen.out("  sta  P8ESTACK_LO,x |  tya |  sta  P8ESTACK_HI,x |  dex")
+    }
+
    private fun translateExpression(expr: PrefixExpression) {
        translateExpression(expr.expression)
        val itype = expr.inferType(program)
--- a/docs/source/programming.rst
+++ b/docs/source/programming.rst
@ -739,9 +739,11 @@ sin16(x)

 sqrt16(w)
    16 bit unsigned integer Square root. Result is unsigned byte.
+    To do the reverse, squaring an integer, just write ``x*x``.

 sqrt(x)
    Floating point Square root.
+    To do the reverse, squaring a floating point number, just write ``x*x`` or ``x**2``.

 tan(x)
    Tangent.
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,9 +1,69 @@
 %import textio
-%zeropage basicsafe
+%import floats
+%import test_stack
+%zeropage dontuse

+; TODO fix float conversion crashes on Cx16  (ubyte as float,  uword as float)

 main {
    sub start() {
-        txt.print("hello")
+        uword total=0
+        uword xx
+        float fl
+        float fltotal=0.0
+        ubyte ub = 22
+
+        for xx in 0 to 100 {
+            txt.print_uw(xx*xx)
+            txt.chrout(',')
+        }
+        txt.nl()
+
+        total = 0
+        c64.SETTIM(0,0,0)
+        repeat 5 {
+            for xx in 1 to 255 {
+                total += xx*xx
+            }
+        }
+        txt.print_uw(total)
+        txt.nl()
+        txt.print_uw(c64.RDTIM16())
+        txt.nl()
+        txt.nl()
+        test_stack.test()
+
+;        fltotal=0.0
+;        c64.SETTIM(0,0,0)
+;        repeat 5 {
+;            for xx in 1 to 255 {
+;                fl = xx as float
+;                ; fl = ub as float
+;                fltotal = fl * fl
+;            }
+;        }
+;
+;        floats.print_f(fltotal)
+;        txt.nl()
+;        txt.print_uw(c64.RDTIM16())
+;        txt.nl()
+;        txt.nl()
+;
+;        fltotal=0.0
+;        c64.SETTIM(0,0,0)
+;        repeat 5 {
+;            for xx in 1 to 255 {
+;                fl = xx as float
+;                ; fl = ub as float
+;                fltotal = fl ** 2
+;            }
+;        }
+;
+;        floats.print_f(fltotal)
+;        txt.nl()
+;        txt.print_uw(c64.RDTIM16())
+;        txt.nl()
+;        txt.nl()
+
    }
 }