improved sqrt16

2024-10-19 07:23:56 +00:00 · 2020-11-01 05:45:49 +01:00 · 2020-11-01 05:45:49 +01:00 · 95c0425151
commit 95c0425151
parent 47cbc7b1f9
2 changed files with 51 additions and 66 deletions
--- a/compiler/res/prog8lib/prog8_lib.asm
+++ b/compiler/res/prog8lib/prog8_lib.asm
@ -708,53 +708,40 @@ func_sqrt16_into_A	.proc
 		.pend
 func_sqrt16	.proc
-		; TODO is this one faster?  http://6502org.wikidot.com/software-math-sqrt
+		; integer square root from  http://6502org.wikidot.com/software-math-sqrt
 		txa
 		pha
 		lda  P8ESTACK_LO+1,x
-		sta  P8ZP_SCRATCH_W2
+		ldy  P8ESTACK_HI+1,x
-		lda  P8ESTACK_HI+1,x
+		sta  P8ZP_SCRATCH_W1
-		sta  P8ZP_SCRATCH_W2+1
+		sty  P8ZP_SCRATCH_W1+1
 		stx  P8ZP_SCRATCH_REG
 		ldy  #$00    ; r = 0
 		ldx  #$07
 		clc         ; clear bit 16 of m
 _loop
 		tya
 		ora  _stab-1,x
 		sta  P8ZP_SCRATCH_B1     ; (r asl 8) | (d asl 7)
 		lda  P8ZP_SCRATCH_W2+1
 		bcs  _skip0  ; m >= 65536? then t <= m is always true
 		cmp  P8ZP_SCRATCH_B1
 		bcc  _skip1  ; t <= m
 _skip0
 		sbc  P8ZP_SCRATCH_B1
 		sta  P8ZP_SCRATCH_W2+1     ; m = m - t
 		tya
 		ora  _stab,x
 		tay         ; r = r or d
 _skip1
 		asl  P8ZP_SCRATCH_W2
 		rol  P8ZP_SCRATCH_W2+1     ; m = m asl 1
 		dex
 		bne  _loop
 		; last iteration
 		bcs  _skip2
 		sty  P8ZP_SCRATCH_B1
 		lda  P8ZP_SCRATCH_W2
 		cmp  #$80
 		lda  P8ZP_SCRATCH_W2+1
 		sbc  P8ZP_SCRATCH_B1
 		bcc  _skip3
 _skip2
 		iny         ; r = r or d (d is 1 here)
 _skip3
 		ldx  P8ZP_SCRATCH_REG
 		tya
 		sta  P8ESTACK_LO+1,x
 		lda  #0
-		sta  P8ESTACK_HI+1,x
+		sta  P8ZP_SCRATCH_B1
 		sta  P8ZP_SCRATCH_REG
 		ldx  #8
 -		sec
 		lda  P8ZP_SCRATCH_W1+1
 		sbc  #$40
 		tay
 		lda  P8ZP_SCRATCH_REG
 		sbc  P8ZP_SCRATCH_B1
 		bcc  +
 		sty  P8ZP_SCRATCH_W1+1
 		sta  P8ZP_SCRATCH_REG
 +		rol  P8ZP_SCRATCH_B1
 		asl  P8ZP_SCRATCH_W1
 		rol  P8ZP_SCRATCH_W1+1
 		rol  P8ZP_SCRATCH_REG
 		asl  P8ZP_SCRATCH_W1
 		rol  P8ZP_SCRATCH_W1+1
 		rol  P8ZP_SCRATCH_REG
 		dex
 		bne  -
 		pla
 		tax
 		lda  P8ZP_SCRATCH_B1
 		sta  P8ESTACK_LO+1,x
 		rts
 _stab   .byte $01,$02,$04,$08,$10,$20,$40,$80
 		.pend
--- a/examples/test.p8
+++ b/examples/test.p8
@ -1,5 +1,4 @@
 %import textio
 %import floats
 %import syslib
 %zeropage basicsafe
@ -9,29 +8,28 @@
 main {
    sub start() {
-        float[]  fls = [1.1, 2.2, 0.0, 4.4, 3.3]
+        uword num
-        float fl
+        ubyte ss
        ubyte ii
-
+        num = 65535
-        fls[2] = sin(fls[0])
+        ss = sqrt16(num)
-        for ii in 0 to len(fls)-1 {
+        txt.print_ub(ss)
            floats.print_f(fls[ii])
            txt.chrout('\n')
        }
        txt.chrout('\n')
-        fls[3] = cos(fls[0])
+        num = 20000
-        for ii in 0 to len(fls)-1 {
+        ss = sqrt16(num)
-            floats.print_f(fls[ii])
+        txt.print_ub(ss)
        txt.chrout('\n')
        }
        num = 9999
        ss = sqrt16(num)
        txt.print_ub(ss)
        txt.chrout('\n')
-;        fl = getfloat()
+        num = 500
-;
+        ss = sqrt16(num)
-;        floats.print_f(fl)
+        txt.print_ub(ss)
-;        txt.chrout('\n')
+        txt.chrout('\n')
        testX()
    }
@ -45,9 +43,9 @@ main {
        return "foobar"
    }
-;    sub getfloat() -> float {
+    sub getfloat() -> float {
-;        return 4.56789
+        return 123.456789
-;    }
+    }
    sub mcp(uword from, uword dest, ubyte length) {
        txt.print_uw(from)