added optimized integer square (x*x) routine

2025-02-18 20:30:43 +00:00 · 2021-03-08 23:08:47 +01:00 · 2021-03-08 23:08:47 +01:00 · 60e169bd87
commit 60e169bd87
parent e4bca5fe47
4 changed files with 155 additions and 2 deletions
--- a/compiler/res/prog8lib/math.asm
+++ b/compiler/res/prog8lib/math.asm
@ -1541,3 +1541,71 @@ _negative	lsr  a
 		rts
 		.pend
 square          .proc
 ; -- calculate square root of signed word in AY, result in AY
 ; routine by Lee Davsion, source: http://6502.org/source/integers/square.htm
 ; using this routine is about twice as fast as doing a regular multiplication.
 ;
 ; Calculates the 16 bit unsigned integer square of the signed 16 bit integer in
 ; Numberl/Numberh.  The result is always in the range 0 to 65025 and is held in
 ; Squarel/Squareh
 ;
 ; The maximum input range is only +/-255 and no checking is done to ensure that
 ; this is so.
 ;
 ; This routine is useful if you are trying to draw circles as for any circle
 ;
 ; x^2+y^2=r^2 where x and y are the co-ordinates of any point on the circle and
 ; r is the circle radius
 numberl = P8ZP_SCRATCH_W1       ; number to square low byte
 numberh = P8ZP_SCRATCH_W1+1     ; number to square high byte
 squarel = P8ZP_SCRATCH_W2       ; square low byte
 squareh = P8ZP_SCRATCH_W2+1     ; square high byte
 tempsq = P8ZP_SCRATCH_B1        ; temp byte for intermediate result
 	sta  numberl
 	sty  numberh
 	stx  P8ZP_SCRATCH_REG
        lda     #$00        ; clear a
        sta     squarel     ; clear square low byte
                            ; (no need to clear the high byte, it gets shifted out)
        lda	numberl     ; get number low byte
 	ldx	numberh     ; get number high  byte
 	bpl	_nonneg      ; if +ve don't negate it
                            ; else do a two's complement
 	eor	#$ff        ; invert
        sec	            ; +1
 	adc	#$00        ; and add it
 _nonneg:
 	sta	tempsq      ; save abs(number)
 	ldx	#$08        ; set bit count
 _nextr2bit:
 	asl	squarel     ; low byte *2
 	rol	squareh     ; high byte *2+carry from low
 	asl	a           ; shift number byte
 	bcc	_nosqadd     ; don't do add if c = 0
 	tay                 ; save a
 	clc                 ; clear carry for add
 	lda	tempsq      ; get number
 	adc	squarel     ; add number^2 low byte
 	sta	squarel     ; save number^2 low byte
 	lda	#$00        ; clear a
 	adc	squareh     ; add number^2 high byte
 	sta	squareh     ; save number^2 high byte
 	tya                 ; get a back
 _nosqadd:
 	dex                 ; decrement bit count
 	bne	_nextr2bit   ; go do next bit
 	lda  squarel
 	ldy  squareh
 	ldx  P8ZP_SCRATCH_REG
 	rts
 		.pend
--- a/compiler/src/prog8/compiler/target/cpu6502/codegen/ExpressionsAsmGen.kt
+++ b/compiler/src/prog8/compiler/target/cpu6502/codegen/ExpressionsAsmGen.kt
@ -1746,6 +1746,13 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
                }
            }
            "*" -> {
                if(leftDt in IntegerDatatypes && rightDt in IntegerDatatypes) {
                    val leftVar = expr.left as? IdentifierReference
                    val rightVar = expr.right as? IdentifierReference
                    if(leftVar!=null && rightVar!=null && leftVar==rightVar)
                        return translateSquared(leftVar, leftDt)
                }
                val value = expr.right.constValue(program)
                if(value!=null) {
                    if(rightDt in IntegerDatatypes) {
@ -1842,6 +1849,22 @@ internal class ExpressionsAsmGen(private val program: Program, private val asmge
        }
    }
    private fun translateSquared(variable: IdentifierReference, dt: DataType) {
        val asmVar = asmgen.asmVariableName(variable)
        when(dt) {
            DataType.BYTE, DataType.UBYTE -> {
                asmgen.out("  lda  $asmVar")
                asmgen.signExtendAYlsb(dt)
                asmgen.out("  jsr  math.square")
            }
            DataType.UWORD, DataType.WORD -> {
                asmgen.out("  lda  $asmVar |  ldy  $asmVar+1 |  jsr  math.square")
            }
            else -> throw AssemblyError("require integer dt for square")
        }
        asmgen.out("  sta  P8ESTACK_LO,x |  tya |  sta  P8ESTACK_HI,x |  dex")
    }
    private fun translateExpression(expr: PrefixExpression) {
        translateExpression(expr.expression)
        val itype = expr.inferType(program)
--- a/docs/source/programming.rst
+++ b/docs/source/programming.rst
@ -739,9 +739,11 @@ sin16(x)
 sqrt16(w)
    16 bit unsigned integer Square root. Result is unsigned byte.
    To do the reverse, squaring an integer, just write ``x*x``.
 sqrt(x)
    Floating point Square root.
    To do the reverse, squaring a floating point number, just write ``x*x`` or ``x**2``.
 tan(x)
    Tangent.
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,9 +1,69 @@
 %import textio
-%zeropage basicsafe
+%import floats
 %import test_stack
 %zeropage dontuse
 ; TODO fix float conversion crashes on Cx16  (ubyte as float,  uword as float)
 main {
    sub start() {
-        txt.print("hello")
+        uword total=0
        uword xx
        float fl
        float fltotal=0.0
        ubyte ub = 22
        for xx in 0 to 100 {
            txt.print_uw(xx*xx)
            txt.chrout(',')
        }
        txt.nl()
        total = 0
        c64.SETTIM(0,0,0)
        repeat 5 {
            for xx in 1 to 255 {
                total += xx*xx
            }
        }
        txt.print_uw(total)
        txt.nl()
        txt.print_uw(c64.RDTIM16())
        txt.nl()
        txt.nl()
        test_stack.test()
 ;        fltotal=0.0
 ;        c64.SETTIM(0,0,0)
 ;        repeat 5 {
 ;            for xx in 1 to 255 {
 ;                fl = xx as float
 ;                ; fl = ub as float
 ;                fltotal = fl * fl
 ;            }
 ;        }
 ;
 ;        floats.print_f(fltotal)
 ;        txt.nl()
 ;        txt.print_uw(c64.RDTIM16())
 ;        txt.nl()
 ;        txt.nl()
 ;
 ;        fltotal=0.0
 ;        c64.SETTIM(0,0,0)
 ;        repeat 5 {
 ;            for xx in 1 to 255 {
 ;                fl = xx as float
 ;                ; fl = ub as float
 ;                fltotal = fl ** 2
 ;            }
 ;        }
 ;
 ;        floats.print_f(fltotal)
 ;        txt.nl()
 ;        txt.print_uw(c64.RDTIM16())
 ;        txt.nl()
 ;        txt.nl()
    }
 }