mirror of
https://github.com/deater/dos33fsprogs.git
synced 2025-01-14 13:33:48 +00:00
b1238af49d
this will probably upset people
91 lines
2.2 KiB
ArmAsm
91 lines
2.2 KiB
ArmAsm
; Fast mutiply -- setup tables
|
|
|
|
|
|
; Note for our purposes we only care about 8.8 x 8.8 fixed point
|
|
; with 8.8 result, which means we only care about the middle two bytes
|
|
; of the 32 bit result. So we disable generation of the high and low byte
|
|
; to save some cycles.
|
|
|
|
;
|
|
; The old routine took around 700 cycles for a 16bitx16bit=32bit mutiply
|
|
; This routine, at an expense of 2kB of looku tables, takes around 250
|
|
; If you reuse a term the next time this drops closer to 200
|
|
|
|
; This routine was described by Stephen Judd and found
|
|
; in The Fridge and in the C=Hacking magazine
|
|
; http://codebase64.org/doku.php?id=base:seriously_fast_multiplication
|
|
|
|
; The key thing to note is that
|
|
; (a+b)^2 (a-b)^2
|
|
; a*b = ------- - --------
|
|
; 4 4
|
|
; So if you have tables of the squares of 0..511 you can lookup and subtract
|
|
; instead of multiplying.
|
|
|
|
; Table generation: I:0..511
|
|
; square1_lo = <((I*I)/4)
|
|
; square1_hi = >((I*I)/4)
|
|
; square2_lo = <(((I-255)*(I-255))/4)
|
|
; square2_hi = >(((I-255)*(I-255))/4)
|
|
|
|
|
|
.ifndef square1_lo
|
|
square1_lo = $2000
|
|
square1_hi = $2200
|
|
square2_lo = $2400
|
|
square2_hi = $2600
|
|
.endif
|
|
|
|
; for(i=0;i<512;i++) {
|
|
; square1_lo[i]=((i*i)/4)&0xff;
|
|
; square1_hi[i]=(((i*i)/4)>>8)&0xff;
|
|
; square2_lo[i]=( ((i-255)*(i-255))/4)&0xff;
|
|
; square2_hi[i]=(( ((i-255)*(i-255))/4)>>8)&0xff;
|
|
; }
|
|
|
|
init_multiply_tables:
|
|
|
|
; Build the add tables
|
|
|
|
ldx #$00
|
|
txa
|
|
.byte $c9 ; CMP #immediate - skip TYA and clear carry flag
|
|
lb1: tya
|
|
adc #$00 ; 0
|
|
ml1: sta square1_hi,x ; square1_hi[0]=0
|
|
tay ; y=0
|
|
cmp #$40 ; subtract 64 and update flags (c=0)
|
|
txa ; a=0
|
|
ror ; rotate
|
|
ml9: adc #$00 ; add 0
|
|
sta ml9+1 ; update add value
|
|
inx ; x=1
|
|
ml0: sta square1_lo,x ; square1_lo[0]=1
|
|
bne lb1 ; if not zero, loop
|
|
inc ml0+2 ; increment values
|
|
inc ml1+2 ; increment values
|
|
clc ; c=0
|
|
iny ; y=1
|
|
bne lb1 ; loop
|
|
|
|
; Build the subtract tables based on the existing one
|
|
|
|
ldx #$00
|
|
ldy #$ff
|
|
second_table:
|
|
lda square1_hi+1,x
|
|
sta square2_hi+$100,x
|
|
lda square1_hi,x
|
|
sta square2_hi,y
|
|
lda square1_lo+1,x
|
|
sta square2_lo+$100,x
|
|
lda square1_lo,x
|
|
sta square2_lo,y
|
|
dey
|
|
inx
|
|
bne second_table
|
|
|
|
|
|
rts
|
|
|