mirror of
https://github.com/irmen/prog8.git
synced 2024-11-22 15:33:02 +00:00
slightly faster sqrt() routine for integers
This commit is contained in:
parent
d8991894e3
commit
bbb6c53457
@ -4,7 +4,8 @@
|
||||
; some more interesting routines can be found here:
|
||||
; http://6502org.wikidot.com/software-math
|
||||
; http://codebase64.org/doku.php?id=base:6502_6510_maths
|
||||
;
|
||||
; https://github.com/TobyLobster/multiply_test
|
||||
; https://github.com/TobyLobster/sqrt_test
|
||||
|
||||
|
||||
multiply_bytes .proc
|
||||
|
@ -68,11 +68,8 @@ asmsub RDTIM() -> ubyte @ A, ubyte @ X, ubyte @ Y {
|
||||
asmsub RDTIM16() clobbers(X) -> uword @AY {
|
||||
; -- like RDTIM() but only returning the lower 16 bits in AY for convenience
|
||||
%asm {{
|
||||
jsr cbm.RDTIM
|
||||
pha
|
||||
txa
|
||||
tay
|
||||
pla
|
||||
lda TIME_LO
|
||||
ldy TIME_MID
|
||||
rts
|
||||
}}
|
||||
}
|
||||
@ -127,7 +124,7 @@ asmsub cleanup_at_exit() {
|
||||
|
||||
asmsub waitvsync() clobbers(A) {
|
||||
; --- busy wait till the next vsync has occurred (approximately), without depending on custom irq handling.
|
||||
; TODO: on PET this now simply waits until the next jiffy clock update
|
||||
; Note: on PET this simply waits until the next jiffy clock update, I don't know if a true vsync is possible there
|
||||
%asm {{
|
||||
lda #1
|
||||
ldy #0
|
||||
|
@ -127,32 +127,41 @@ _possibly_zero cmp #0
|
||||
|
||||
|
||||
func_sqrt16_into_A .proc
|
||||
; integer square root from http://6502org.wikidot.com/software-math-sqrt
|
||||
sta P8ZP_SCRATCH_W1
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
lda #0
|
||||
sta P8ZP_SCRATCH_B1
|
||||
sta P8ZP_SCRATCH_REG
|
||||
ldx #8
|
||||
- sec
|
||||
lda P8ZP_SCRATCH_W1+1
|
||||
; integer square root
|
||||
; http://6502org.wikidot.com/software-math-sqrt
|
||||
; https://github.com/TobyLobster/sqrt_test/blob/main/sqrt/sqrt7.a
|
||||
; Tweaked by TobyLobster and 0xC0DE to be smaller and faster
|
||||
_numl = P8ZP_SCRATCH_W1
|
||||
_numh = P8ZP_SCRATCH_W1+1
|
||||
_loop_counter = P8ZP_SCRATCH_REG
|
||||
_root = P8ZP_SCRATCH_B1
|
||||
sta _numl
|
||||
sty _numh
|
||||
ldx #$ff
|
||||
stx _loop_counter
|
||||
inx
|
||||
stx _root
|
||||
sec
|
||||
_loop lda _numh
|
||||
sbc #$40
|
||||
tay
|
||||
lda P8ZP_SCRATCH_REG
|
||||
sbc P8ZP_SCRATCH_B1
|
||||
txa
|
||||
sbc _root
|
||||
bcc +
|
||||
sty P8ZP_SCRATCH_W1+1
|
||||
sta P8ZP_SCRATCH_REG
|
||||
+ rol P8ZP_SCRATCH_B1
|
||||
asl P8ZP_SCRATCH_W1
|
||||
rol P8ZP_SCRATCH_W1+1
|
||||
rol P8ZP_SCRATCH_REG
|
||||
asl P8ZP_SCRATCH_W1
|
||||
rol P8ZP_SCRATCH_W1+1
|
||||
rol P8ZP_SCRATCH_REG
|
||||
dex
|
||||
bne -
|
||||
lda P8ZP_SCRATCH_B1
|
||||
sty _numh
|
||||
bcs ++
|
||||
+ txa
|
||||
+ rol _root
|
||||
asl _numl
|
||||
rol _numh
|
||||
rol a
|
||||
asl _numl
|
||||
rol _numh
|
||||
rol a
|
||||
tax
|
||||
lsr _loop_counter
|
||||
bne _loop
|
||||
lda _root
|
||||
rts
|
||||
.pend
|
||||
|
||||
|
@ -14,7 +14,7 @@ Currently these machines can be selected as a compilation target (via the ``-tar
|
||||
- 'c64': the Commodore 64
|
||||
- 'cx16': the `Commander X16 <https://www.commanderx16.com/>`_
|
||||
- 'c128': the Commodore 128 (*limited support*)
|
||||
- 'pet32': the Commodore PET 4032 (*experimental support*)
|
||||
- 'pet32': the Commodore PET 4032 (*limited support*)
|
||||
- 'atari': the Atari 800 XL (*experimental support*)
|
||||
- 'virtual': a builtin virtual machine
|
||||
|
||||
|
@ -1,7 +1,8 @@
|
||||
TODO
|
||||
====
|
||||
|
||||
- check mult and sqrt routines with the benchmarked ones on https://github.com/TobyLobster/sqrt_test / https://github.com/TobyLobster/multiply_test
|
||||
- don't allow txt.print('@') if possible, don't cast up a byte to str
|
||||
- check mult routines with the benchmarked ones on https://github.com/TobyLobster/multiply_test
|
||||
- is math.square still the fastest after this? (now used for word*word)
|
||||
- [on branch:] investigate McCarthy evaluation again? this may also reduce code size perhaps for things like if a>4 or a<2 ....
|
||||
- IR: reduce the number of branch instructions such as BEQ, BEQR, etc (gradually), replace with CMP(I) + status branch instruction
|
||||
|
@ -14,7 +14,7 @@
|
||||
<keywords keywords="&;->;@;and;as;asmsub;break;clobbers;do;downto;else;false;for;goto;if;if_cc;if_cs;if_eq;if_mi;if_ne;if_neg;if_nz;if_pl;if_pos;if_vc;if_vs;if_z;in;inline;not;or;repeat;return;romsub;step;sub;to;true;unroll;until;when;while;xor;~" ignore_case="false" />
|
||||
<keywords2 keywords="%address;%asm;%asmbinary;%asminclude;%breakpoint;%import;%ir;%launcher;%option;%output;%zeropage;%zpreserved;iso:;petscii:;sc:" />
|
||||
<keywords3 keywords="@requirezp;@shared;@split;@zp;bool;byte;const;float;str;ubyte;uword;void;word" />
|
||||
<keywords4 keywords="abs;all;any;callfar;callram;callrom;clamp;cmp;divmod;len;lsb;max;memory;min;mkword;msb;peek;peekw;poke;pokew;pop;popw;push;pushw;reverse;rol;rol2;ror;ror2;rrestore;rrestorex;rsave;rsavex;sgn;sizeof;sort;sqrt;sqrt16;swap;|>" />
|
||||
<keywords4 keywords="abs;all;any;callfar;callram;callrom;clamp;cmp;divmod;len;lsb;max;memory;min;mkword;msb;peek;peekw;poke;pokew;pop;popw;push;pushw;reverse;rol;rol2;ror;ror2;rrestore;rrestorex;rsave;rsavex;sgn;sizeof;sort;sqrt;swap;|>" />
|
||||
</highlighting>
|
||||
<extensionMap>
|
||||
<mapping ext="p8" />
|
||||
|
Loading…
Reference in New Issue
Block a user