prog8/compiler/res/prog8lib/math.p8
Irmen de Jong 9438e996d7 Fixed math.mul16_last_upper().
Added math.lerpw() a LERP routine for words (to complement the existing math.lerp() for bytes)
Described the LERP routines in the library chapter in the docs.
2024-11-12 18:31:24 +01:00

666 lines
18 KiB
Lua

; Internal Math library routines - always included by the compiler
; note: some functions you might expect here are builtin functions,
; such as abs, sqrt, clamp, min, max for example.
math {
%option no_symbol_prefixing, ignore_unused
asmsub sin8u(ubyte angle @A) clobbers(Y) -> ubyte @A {
%asm {{
tay
lda _sinecos8u,y
rts
_sinecos8u .byte trunc(128.0 + 127.5 * sin(range(256+64) * rad(360.0/256.0)))
; !notreached!
}}
}
asmsub cos8u(ubyte angle @A) clobbers(Y) -> ubyte @A {
%asm {{
tay
lda sin8u._sinecos8u+64,y
rts
}}
}
asmsub sin8(ubyte angle @A) clobbers(Y) -> byte @A {
%asm {{
tay
lda _sinecos8,y
rts
_sinecos8 .char trunc(127.0 * sin(range(256+64) * rad(360.0/256.0)))
; !notreached!
}}
}
asmsub cos8(ubyte angle @A) clobbers(Y) -> byte @A {
%asm {{
tay
lda sin8._sinecos8+64,y
rts
}}
}
asmsub sinr8u(ubyte radians @A) clobbers(Y) -> ubyte @A {
%asm {{
tay
lda _sinecosR8u,y
rts
_sinecosR8u .byte trunc(128.0 + 127.5 * sin(range(180+45) * rad(360.0/180.0)))
; !notreached!
}}
}
asmsub cosr8u(ubyte radians @A) clobbers(Y) -> ubyte @A {
%asm {{
tay
lda sinr8u._sinecosR8u+45,y
rts
}}
}
asmsub sinr8(ubyte radians @A) clobbers(Y) -> byte @A {
%asm {{
tay
lda _sinecosR8,y
rts
_sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
; !notreached!
}}
}
asmsub cosr8(ubyte radians @A) clobbers(Y) -> byte @A {
%asm {{
tay
lda sinr8._sinecosR8+45,y
rts
}}
}
asmsub rnd() clobbers(Y) -> ubyte @A {
%asm {{
jmp prog8_math.randbyte
}}
}
asmsub rndw() -> uword @AY {
%asm {{
jmp prog8_math.randword
}}
}
sub randrange(ubyte n) -> ubyte {
; -- return random number uniformly distributed from 0 to n-1 (compensates for divisibility bias)
cx16.r0H = 255 / n * n
do {
cx16.r0L = math.rnd()
} until cx16.r0L < cx16.r0H
return cx16.r0L % n
}
sub randrangew(uword n) -> uword {
; -- return random number uniformly distributed from 0 to n-1 (compensates for divisibility bias)
cx16.r1 = 65535 / n * n
do {
cx16.r0 = math.rndw()
} until cx16.r0 < cx16.r1
return cx16.r0 % n
}
asmsub rndseed(uword seed1 @AY, uword seed2 @R0) clobbers(A,Y) {
; -- set new pseudo RNG's seed values. Defaults are: $00c2, $1137
%asm {{
sta prog8_math.randword.x1
sty prog8_math.randword.c1
lda cx16.r0L
sta prog8_math.randword.a1
lda cx16.r0H
sta prog8_math.randword.b1
rts
}}
}
asmsub log2(ubyte value @A) -> ubyte @Y {
%asm {{
sta P8ZP_SCRATCH_B1
lda #$80
ldy #7
- bit P8ZP_SCRATCH_B1
beq +
rts
+ dey
bne +
rts
+ lsr a
bne -
; !notreached!
}}
}
asmsub log2w(uword value @AY) -> ubyte @Y {
%asm {{
sta P8ZP_SCRATCH_W1
sty P8ZP_SCRATCH_W1+1
lda #<$8000
sta cx16.r0
lda #>$8000
sta cx16.r0+1
ldy #15
- lda P8ZP_SCRATCH_W1
and cx16.r0
sta P8ZP_SCRATCH_B1
lda P8ZP_SCRATCH_W1+1
and cx16.r0+1
ora P8ZP_SCRATCH_B1
beq +
rts
+ dey
bne +
rts
+ lsr cx16.r0+1
ror cx16.r0
jmp -
}}
}
asmsub mul16_last_upper() -> uword @AY {
; This routine peeks into the internal 32 bits multiplication result buffer of the
; 16*16 bits multiplication routine, to fetch the upper 16 bits of the last calculation.
; Notes:
; - to avoid interference it's best to fetch and store this value immediately after the multiplication expression.
; for instance, simply printing a number may already result in new multiplication calls being performed
; - not all multiplications in the source code result in an actual multiplication call:
; some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all!
; - THE RESULT IS ONLY VALID IF THE MULTIPLICATION WAS DONE WITH UWORD ARGUMENTS (or two positive WORD arguments)
; as soon as a negative word value (or 2) was used in the multiplication, these upper 16 bits are not valid!!
; Suggestion (if you are on the Commander X16): use verafx.muls() to get a hardware accelerated 32 bit signed multplication.
%asm {{
lda prog8_math.multiply_words.result+2
ldy prog8_math.multiply_words.result+3
rts
}}
}
sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
cx16.r0L = 3 ; quadrant
cx16.r1sL = x2-x1 ; xdelta
if_neg {
cx16.r0L--
cx16.r1sL = -cx16.r1sL
}
cx16.r2sL = y2-y1 ; ydelta
if_neg {
cx16.r0L-=2
cx16.r2sL = -cx16.r2sL
}
return direction_qd(cx16.r0L, cx16.r1L, cx16.r2L)
}
sub direction(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
; From a pair of positive coordinates, calculate discrete direction between 0 and 23 into A.
cx16.r0L = 3 ; quadrant
if x2>=x1 {
cx16.r1L = x2-x1
} else {
cx16.r1L = x1-x2
cx16.r0L--
}
if y2>=y1 {
cx16.r2L = y2-y1
} else {
cx16.r2L = y1-y2
cx16.r0L -= 2
}
return direction_qd(cx16.r0L, cx16.r1L, cx16.r2L)
}
asmsub direction_qd(ubyte quadrant @A, ubyte xdelta @X, ubyte ydelta @Y) -> ubyte @A {
;Arctan https://github.com/dustmop/arctan24
; From a pair of X/Y deltas (both >=0), and quadrant 0-3, calculate discrete direction between 0 and 23 into A.
; .reg:a @in quadrant Number 0 to 3.
; .reg:x @in x_delta Delta for x direction.
; .reg:y @in y_delta Delta for y direction.
; Returns A as the direction (0-23).
%asm {{
x_delta = cx16.r0L
y_delta = cx16.r1L
quadrant = cx16.r2L
half_value = cx16.r3L
region_number = cx16.r4L
small = cx16.r5L
large = cx16.r5H
sta quadrant
sty y_delta
stx x_delta
cpx y_delta
bcs _XGreaterOrEqualY
_XLessY:
lda #16
sta region_number
stx small
sty large
bne _DetermineRegion
_XGreaterOrEqualY:
lda #0
sta region_number
stx large
sty small
_DetermineRegion:
; set A = small * 2.5
lda small
lsr a
sta half_value
lda small
asl a
bcs _SmallerQuotient
clc
adc half_value
bcs _SmallerQuotient
cmp large
bcc _LargerQuotient
; S * 2.5 > L
_SmallerQuotient:
; set A = S * 1.25
lsr half_value
lda small
clc
adc half_value
cmp large
bcc _Region1 ; if S * 1.25 < L then goto Region1 (L / S > 1.25)
bcs _Region0 ; (L / S < 1.25)
; S * 2.5 < L
_LargerQuotient:
; set A = S * 7.5
lda small
asl a
asl a
asl a
bcs _Region2
sec
sbc half_value
cmp large
bcc _Region3 ; if S * 7.5 < L then goto Region3 (L / S > 7.5)
jmp _Region2 ; (L / S < 7.5)
_Region0:
; L / S < 1.25. d=3,9,15,21
jmp _LookupResult
_Region1:
; 1.25 < L / S < 2.5. d=2,4,8,10,14,16,20,22
lda region_number
clc
adc #4
sta region_number
bpl _LookupResult
_Region2:
; 2.5 < L / S < 7.5. d=1,5,7,11,13,17,19,23
lda region_number
clc
adc #8
sta region_number
bpl _LookupResult
_Region3:
; 7.5 < L / S. d=0,6,12,18
lda region_number
clc
adc #12
sta region_number
_LookupResult:
lda quadrant
clc
adc region_number
tax
lda _quadrant_region_to_direction,x
rts
_quadrant_region_to_direction:
.byte 9, 3,15,21
.byte 10, 2,14,22
.byte 11, 1,13,23
.byte 12, 0,12, 0
.byte 9, 3,15,21
.byte 8, 4,16,20
.byte 7, 5,17,19
.byte 6, 6,18,18
; !notreached!
}}
}
asmsub atan2(ubyte x1 @R0, ubyte y1 @R1, ubyte x2 @R2, ubyte y2 @R3) -> ubyte @A {
;; Calculate the angle, in a 256-degree circle, between two points into A.
;; The points (x1, y1) and (x2, y2) have to use *unsigned coordinates only* from the positive quadrant in the carthesian plane!
;; https://www.codebase64.org/doku.php?id=base:8bit_atan2_8-bit_angle
;; This uses 2 large lookup tables so uses a lot of memory but is super fast.
%asm {{
x1 = cx16.r0L
y1 = cx16.r1L
x2 = cx16.r2L
y2 = cx16.r3L
octant = cx16.r4L ;; temporary zeropage variable
lda x1
sec
sbc x2
bcs *+4
eor #$ff
tax
rol octant
lda y1
sec
sbc y2
bcs *+4
eor #$ff
tay
rol octant
lda log2_tab,x
sec
sbc log2_tab,y
bcc *+4
eor #$ff
tax
lda octant
rol a
and #%111
tay
lda atan_tab,x
eor octant_adjust,y
rts
octant_adjust
.byte %00111111 ;; x+,y+,|x|>|y|
.byte %00000000 ;; x+,y+,|x|<|y|
.byte %11000000 ;; x+,y-,|x|>|y|
.byte %11111111 ;; x+,y-,|x|<|y|
.byte %01000000 ;; x-,y+,|x|>|y|
.byte %01111111 ;; x-,y+,|x|<|y|
.byte %10111111 ;; x-,y-,|x|>|y|
.byte %10000000 ;; x-,y-,|x|<|y|
;;;;;;;; atan(2^(x/32))*128/pi ;;;;;;;;
atan_tab
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$00,$00,$00
.byte $00,$00,$00,$00,$00,$01,$01,$01
.byte $01,$01,$01,$01,$01,$01,$01,$01
.byte $01,$01,$01,$01,$01,$01,$01,$01
.byte $01,$01,$01,$01,$01,$01,$01,$01
.byte $01,$01,$01,$01,$01,$02,$02,$02
.byte $02,$02,$02,$02,$02,$02,$02,$02
.byte $02,$02,$02,$02,$02,$02,$02,$02
.byte $03,$03,$03,$03,$03,$03,$03,$03
.byte $03,$03,$03,$03,$03,$04,$04,$04
.byte $04,$04,$04,$04,$04,$04,$04,$04
.byte $05,$05,$05,$05,$05,$05,$05,$05
.byte $06,$06,$06,$06,$06,$06,$06,$06
.byte $07,$07,$07,$07,$07,$07,$08,$08
.byte $08,$08,$08,$08,$09,$09,$09,$09
.byte $09,$0a,$0a,$0a,$0a,$0b,$0b,$0b
.byte $0b,$0c,$0c,$0c,$0c,$0d,$0d,$0d
.byte $0d,$0e,$0e,$0e,$0e,$0f,$0f,$0f
.byte $10,$10,$10,$11,$11,$11,$12,$12
.byte $12,$13,$13,$13,$14,$14,$15,$15
.byte $15,$16,$16,$17,$17,$17,$18,$18
.byte $19,$19,$19,$1a,$1a,$1b,$1b,$1c
.byte $1c,$1c,$1d,$1d,$1e,$1e,$1f,$1f
;;;;;;;; log2(x)*32 ;;;;;;;;
log2_tab
.byte $00,$00,$20,$32,$40,$4a,$52,$59
.byte $60,$65,$6a,$6e,$72,$76,$79,$7d
.byte $80,$82,$85,$87,$8a,$8c,$8e,$90
.byte $92,$94,$96,$98,$99,$9b,$9d,$9e
.byte $a0,$a1,$a2,$a4,$a5,$a6,$a7,$a9
.byte $aa,$ab,$ac,$ad,$ae,$af,$b0,$b1
.byte $b2,$b3,$b4,$b5,$b6,$b7,$b8,$b9
.byte $b9,$ba,$bb,$bc,$bd,$bd,$be,$bf
.byte $c0,$c0,$c1,$c2,$c2,$c3,$c4,$c4
.byte $c5,$c6,$c6,$c7,$c7,$c8,$c9,$c9
.byte $ca,$ca,$cb,$cc,$cc,$cd,$cd,$ce
.byte $ce,$cf,$cf,$d0,$d0,$d1,$d1,$d2
.byte $d2,$d3,$d3,$d4,$d4,$d5,$d5,$d5
.byte $d6,$d6,$d7,$d7,$d8,$d8,$d9,$d9
.byte $d9,$da,$da,$db,$db,$db,$dc,$dc
.byte $dd,$dd,$dd,$de,$de,$de,$df,$df
.byte $df,$e0,$e0,$e1,$e1,$e1,$e2,$e2
.byte $e2,$e3,$e3,$e3,$e4,$e4,$e4,$e5
.byte $e5,$e5,$e6,$e6,$e6,$e7,$e7,$e7
.byte $e7,$e8,$e8,$e8,$e9,$e9,$e9,$ea
.byte $ea,$ea,$ea,$eb,$eb,$eb,$ec,$ec
.byte $ec,$ec,$ed,$ed,$ed,$ed,$ee,$ee
.byte $ee,$ee,$ef,$ef,$ef,$ef,$f0,$f0
.byte $f0,$f1,$f1,$f1,$f1,$f1,$f2,$f2
.byte $f2,$f2,$f3,$f3,$f3,$f3,$f4,$f4
.byte $f4,$f4,$f5,$f5,$f5,$f5,$f5,$f6
.byte $f6,$f6,$f6,$f7,$f7,$f7,$f7,$f7
.byte $f8,$f8,$f8,$f8,$f9,$f9,$f9,$f9
.byte $f9,$fa,$fa,$fa,$fa,$fa,$fb,$fb
.byte $fb,$fb,$fb,$fc,$fc,$fc,$fc,$fc
.byte $fd,$fd,$fd,$fd,$fd,$fd,$fe,$fe
.byte $fe,$fe,$fe,$ff,$ff,$ff,$ff,$ff
; !notreached!
}}
}
asmsub diff(ubyte v1 @A, ubyte v2 @Y) -> ubyte @A {
; -- returns the (absolute) difference, or distance, between the two bytes
%asm {{
sty P8ZP_SCRATCH_REG
sec
sbc P8ZP_SCRATCH_REG
bcs +
eor #255
inc a
+ rts
}}
}
asmsub diffw(uword w1 @R0, uword w2 @AY) -> uword @AY {
; -- returns the (absolute) difference, or distance, between the two words
%asm {{
sec
sbc cx16.r0L
sta cx16.r0L
tya
sbc cx16.r0H
sta cx16.r0H
bcs +
eor #255
sta cx16.r0H
lda cx16.r0L
eor #255
inc a
sta cx16.r0L
bne +
inc cx16.r0H
+ lda cx16.r0L
ldy cx16.r0H
rts
}}
}
sub crc16(uword data, uword length) -> uword {
; calculates the CRC16 (XMODEM) checksum of the buffer.
; There are also "streaming" crc16_start/update/end routines below, that allow you to calculate crc16 for data that doesn't fit in a single memory block.
crc16_start()
cx16.r13 = data
cx16.r14 = data+length
while cx16.r13!=cx16.r14 {
crc16_update(@(cx16.r13))
cx16.r13++
}
return crc16_end()
}
sub crc16_start() {
; start the "streaming" crc16
; note: tracks the crc16 checksum in cx16.r15!
; if your code uses that, it must save/restore it before calling this routine
cx16.r15 = 0
}
asmsub crc16_update(ubyte value @A) {
; update the "streaming" crc16 with next byte value
; note: tracks the crc16 checksum in cx16.r15!
; if your code uses that, it must save/restore it before calling this routine
%asm {{
eor cx16.r15H
sta cx16.r15H
ldy #8
- lda cx16.r15H
asl cx16.r15L
rol cx16.r15H
and #$80
beq +
lda cx16.r15H
eor #$10
sta cx16.r15H
lda cx16.r15L
eor #$21
sta cx16.r15L
+ dey
bne -
rts
}}
; orignal prog8 code was:
; cx16.r15H ^= value
; repeat 8 {
; if cx16.r15H & $80 !=0 {
; cx16.r15 <<=1
; cx16.r15 ^= $1021
; } else
; cx16.r15<<=1
; }
}
sub crc16_end() -> uword {
; finalize the "streaming" crc16, returns resulting crc16 value
return cx16.r15
}
sub crc32(uword data, uword length) {
; Calculates the CRC-32 (POSIX) checksum of the buffer.
; because prog8 doesn't have 32 bits integers, we have to split up the calculation over 2 words.
; result stored in cx16.r14 (low word) and cx16.r15 (high word)
; There are also "streaming" crc32_start/update/end routines below, that allow you to calculate crc32 for data that doesn't fit in a single memory block.
crc32_start()
cx16.r12 = data
cx16.r13 = data+length
while cx16.r12!=cx16.r13 {
crc32_update(@(cx16.r12))
cx16.r12++
}
crc32_end()
}
sub crc32_start() {
; start the "streaming" crc32
; note: tracks the crc32 checksum in cx16.r14 and cx16.r15!
; if your code uses these, it must save/restore them before calling this routine
cx16.r14 = cx16.r15 = 0
}
asmsub crc32_update(ubyte value @A) {
; update the "streaming" crc32 with next byte value
; note: tracks the crc32 checksum in cx16.r14 and cx16.r15!
; if your code uses these, it must save/restore them before calling this routine
%asm {{
eor cx16.r15H
sta cx16.r15H
ldy #8
- lda cx16.r15H
asl cx16.r14L
rol cx16.r14H
rol cx16.r15L
rol cx16.r15H
and #$80
beq +
lda cx16.r15H
eor #$04
sta cx16.r15H
lda cx16.r15L
eor #$c1
sta cx16.r15L
lda cx16.r14H
eor #$1d
sta cx16.r14H
lda cx16.r14L
eor #$b7
sta cx16.r14L
+ dey
bne -
rts
}}
; original prog8 code:
; cx16.r15H ^= value
; repeat 8 {
; if cx16.r15H & $80 !=0 {
; cx16.r14 <<= 1
; rol(cx16.r15)
; cx16.r15 ^= $04c1
; cx16.r14 ^= $1db7
; }
; else {
; cx16.r14 <<= 1
; rol(cx16.r15)
; }
; }
}
sub crc32_end() {
; finalize the "streaming" crc32
; result stored in cx16.r14 (low word) and cx16.r15 (high word)
cx16.r15 ^= $ffff
cx16.r14 ^= $ffff
}
sub lerp(ubyte v0, ubyte v1, ubyte t) -> ubyte {
; Linear interpolation (LERP)
; returns an interpolation between two inputs (v0, v1) for a parameter t in the interval [0, 255]
; guarantees v = v1 when t = 255
return v0 + msb(t as uword * (v1 - v0) + 255)
}
sub lerpw(uword v0, uword v1, uword t) -> uword {
; Linear interpolation (LERP) on word values
; returns an interpolation between two inputs (v0, v1) for a parameter t in the interval [0, 65535]
; guarantees v = v1 when t = 65535
t *= v1-v0
cx16.r0 = math.mul16_last_upper()
if t!=0
cx16.r0++
return v0 + cx16.r0
}
}