mirror of
https://github.com/irmen/prog8.git
synced 2024-11-22 15:33:02 +00:00
655 lines
17 KiB
Lua
655 lines
17 KiB
Lua
; Internal Math library routines - always included by the compiler
|
|
; note: some functions you might expect here are builtin functions,
|
|
; such as abs, sqrt, clamp, min, max for example.
|
|
|
|
math {
|
|
%option no_symbol_prefixing, ignore_unused
|
|
|
|
asmsub sin8u(ubyte angle @A) clobbers(Y) -> ubyte @A {
|
|
%asm {{
|
|
tay
|
|
lda _sinecos8u,y
|
|
rts
|
|
_sinecos8u .byte trunc(128.0 + 127.5 * sin(range(256+64) * rad(360.0/256.0)))
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub cos8u(ubyte angle @A) clobbers(Y) -> ubyte @A {
|
|
%asm {{
|
|
tay
|
|
lda sin8u._sinecos8u+64,y
|
|
rts
|
|
}}
|
|
}
|
|
|
|
asmsub sin8(ubyte angle @A) clobbers(Y) -> byte @A {
|
|
%asm {{
|
|
tay
|
|
lda _sinecos8,y
|
|
rts
|
|
_sinecos8 .char trunc(127.0 * sin(range(256+64) * rad(360.0/256.0)))
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub cos8(ubyte angle @A) clobbers(Y) -> byte @A {
|
|
%asm {{
|
|
tay
|
|
lda sin8._sinecos8+64,y
|
|
rts
|
|
}}
|
|
}
|
|
|
|
asmsub sinr8u(ubyte radians @A) clobbers(Y) -> ubyte @A {
|
|
%asm {{
|
|
tay
|
|
lda _sinecosR8u,y
|
|
rts
|
|
_sinecosR8u .byte trunc(128.0 + 127.5 * sin(range(180+45) * rad(360.0/180.0)))
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub cosr8u(ubyte radians @A) clobbers(Y) -> ubyte @A {
|
|
%asm {{
|
|
tay
|
|
lda sinr8u._sinecosR8u+45,y
|
|
rts
|
|
}}
|
|
}
|
|
|
|
asmsub sinr8(ubyte radians @A) clobbers(Y) -> byte @A {
|
|
%asm {{
|
|
tay
|
|
lda _sinecosR8,y
|
|
rts
|
|
_sinecosR8 .char trunc(127.0 * sin(range(180+45) * rad(360.0/180.0)))
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub cosr8(ubyte radians @A) clobbers(Y) -> byte @A {
|
|
%asm {{
|
|
tay
|
|
lda sinr8._sinecosR8+45,y
|
|
rts
|
|
}}
|
|
}
|
|
|
|
asmsub rnd() clobbers(Y) -> ubyte @A {
|
|
%asm {{
|
|
jmp prog8_math.randbyte
|
|
}}
|
|
}
|
|
|
|
asmsub rndw() -> uword @AY {
|
|
%asm {{
|
|
jmp prog8_math.randword
|
|
}}
|
|
}
|
|
|
|
sub randrange(ubyte n) -> ubyte {
|
|
; -- return random number uniformly distributed from 0 to n-1 (compensates for divisibility bias)
|
|
cx16.r0H = 255 / n * n
|
|
do {
|
|
cx16.r0L = math.rnd()
|
|
} until cx16.r0L < cx16.r0H
|
|
return cx16.r0L % n
|
|
}
|
|
|
|
sub randrangew(uword n) -> uword {
|
|
; -- return random number uniformly distributed from 0 to n-1 (compensates for divisibility bias)
|
|
cx16.r1 = 65535 / n * n
|
|
do {
|
|
cx16.r0 = math.rndw()
|
|
} until cx16.r0 < cx16.r1
|
|
return cx16.r0 % n
|
|
}
|
|
|
|
asmsub rndseed(uword seed1 @AY, uword seed2 @R0) clobbers(A,Y) {
|
|
; -- set new pseudo RNG's seed values. Defaults are: $00c2, $1137
|
|
%asm {{
|
|
sta prog8_math.randword.x1
|
|
sty prog8_math.randword.c1
|
|
lda cx16.r0L
|
|
sta prog8_math.randword.a1
|
|
lda cx16.r0H
|
|
sta prog8_math.randword.b1
|
|
rts
|
|
}}
|
|
}
|
|
|
|
asmsub log2(ubyte value @A) -> ubyte @Y {
|
|
%asm {{
|
|
sta P8ZP_SCRATCH_B1
|
|
lda #$80
|
|
ldy #7
|
|
- bit P8ZP_SCRATCH_B1
|
|
beq +
|
|
rts
|
|
+ dey
|
|
bne +
|
|
rts
|
|
+ lsr a
|
|
bne -
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub log2w(uword value @AY) -> ubyte @Y {
|
|
%asm {{
|
|
sta P8ZP_SCRATCH_W1
|
|
sty P8ZP_SCRATCH_W1+1
|
|
lda #<$8000
|
|
sta cx16.r0
|
|
lda #>$8000
|
|
sta cx16.r0+1
|
|
ldy #15
|
|
- lda P8ZP_SCRATCH_W1
|
|
and cx16.r0
|
|
sta P8ZP_SCRATCH_B1
|
|
lda P8ZP_SCRATCH_W1+1
|
|
and cx16.r0+1
|
|
ora P8ZP_SCRATCH_B1
|
|
beq +
|
|
rts
|
|
+ dey
|
|
bne +
|
|
rts
|
|
+ lsr cx16.r0+1
|
|
ror cx16.r0
|
|
jmp -
|
|
}}
|
|
}
|
|
|
|
asmsub mul16_last_upper() -> uword @AY {
|
|
; This routine peeks into the internal 32 bits multiplication result buffer of the
|
|
; 16*16 bits multiplication routine, to fetch the upper 16 bits of the last calculation.
|
|
; Notes:
|
|
; - to avoid interference it's best to fetch and store this value immediately after the multiplication expression.
|
|
; for instance, simply printing a number may already result in new multiplication calls being performed
|
|
; - not all multiplications in the source code result in an actual multiplication call:
|
|
; some simpler multiplications will be optimized away into faster routines. These will not set the upper 16 bits at all!
|
|
; - THE RESULT IS ONLY VALID IF THE MULTIPLICATION WAS DONE WITH UWORD ARGUMENTS (or two positive WORD arguments)
|
|
; as soon as a negative word value (or 2) was used in the multiplication, these upper 16 bits are not valid!!
|
|
; Suggestion (if you are on the Commander X16): use verafx.muls() to get a hardware accelerated 32 bit signed multplication.
|
|
%asm {{
|
|
lda multiply_words.result+2
|
|
ldy multiply_words.result+3
|
|
rts
|
|
}}
|
|
}
|
|
|
|
sub direction_sc(byte x1, byte y1, byte x2, byte y2) -> ubyte {
|
|
; From a pair of signed coordinates around the origin, calculate discrete direction between 0 and 23 into A.
|
|
cx16.r0L = 3 ; quadrant
|
|
cx16.r1sL = x2-x1 ; xdelta
|
|
if_neg {
|
|
cx16.r0L--
|
|
cx16.r1sL = -cx16.r1sL
|
|
}
|
|
cx16.r2sL = y2-y1 ; ydelta
|
|
if_neg {
|
|
cx16.r0L-=2
|
|
cx16.r2sL = -cx16.r2sL
|
|
}
|
|
return direction_qd(cx16.r0L, cx16.r1L, cx16.r2L)
|
|
}
|
|
|
|
sub direction(ubyte x1, ubyte y1, ubyte x2, ubyte y2) -> ubyte {
|
|
; From a pair of positive coordinates, calculate discrete direction between 0 and 23 into A.
|
|
cx16.r0L = 3 ; quadrant
|
|
if x2>=x1 {
|
|
cx16.r1L = x2-x1
|
|
} else {
|
|
cx16.r1L = x1-x2
|
|
cx16.r0L--
|
|
}
|
|
if y2>=y1 {
|
|
cx16.r2L = y2-y1
|
|
} else {
|
|
cx16.r2L = y1-y2
|
|
cx16.r0L -= 2
|
|
}
|
|
return direction_qd(cx16.r0L, cx16.r1L, cx16.r2L)
|
|
}
|
|
|
|
asmsub direction_qd(ubyte quadrant @A, ubyte xdelta @X, ubyte ydelta @Y) -> ubyte @A {
|
|
;Arctan https://github.com/dustmop/arctan24
|
|
; From a pair of X/Y deltas (both >=0), and quadrant 0-3, calculate discrete direction between 0 and 23 into A.
|
|
; .reg:a @in quadrant Number 0 to 3.
|
|
; .reg:x @in x_delta Delta for x direction.
|
|
; .reg:y @in y_delta Delta for y direction.
|
|
; Returns A as the direction (0-23).
|
|
|
|
%asm {{
|
|
x_delta = cx16.r0L
|
|
y_delta = cx16.r1L
|
|
quadrant = cx16.r2L
|
|
half_value = cx16.r3L
|
|
region_number = cx16.r4L
|
|
small = cx16.r5L
|
|
large = cx16.r5H
|
|
|
|
sta quadrant
|
|
sty y_delta
|
|
stx x_delta
|
|
cpx y_delta
|
|
bcs _XGreaterOrEqualY
|
|
|
|
_XLessY:
|
|
lda #16
|
|
sta region_number
|
|
stx small
|
|
sty large
|
|
bne _DetermineRegion
|
|
|
|
_XGreaterOrEqualY:
|
|
lda #0
|
|
sta region_number
|
|
stx large
|
|
sty small
|
|
|
|
_DetermineRegion:
|
|
; set A = small * 2.5
|
|
lda small
|
|
lsr a
|
|
sta half_value
|
|
lda small
|
|
asl a
|
|
bcs _SmallerQuotient
|
|
clc
|
|
adc half_value
|
|
bcs _SmallerQuotient
|
|
cmp large
|
|
bcc _LargerQuotient
|
|
|
|
; S * 2.5 > L
|
|
_SmallerQuotient:
|
|
; set A = S * 1.25
|
|
lsr half_value
|
|
lda small
|
|
clc
|
|
adc half_value
|
|
cmp large
|
|
bcc _Region1 ; if S * 1.25 < L then goto Region1 (L / S > 1.25)
|
|
bcs _Region0 ; (L / S < 1.25)
|
|
|
|
; S * 2.5 < L
|
|
_LargerQuotient:
|
|
; set A = S * 7.5
|
|
lda small
|
|
asl a
|
|
asl a
|
|
asl a
|
|
bcs _Region2
|
|
sec
|
|
sbc half_value
|
|
cmp large
|
|
bcc _Region3 ; if S * 7.5 < L then goto Region3 (L / S > 7.5)
|
|
jmp _Region2 ; (L / S < 7.5)
|
|
|
|
_Region0:
|
|
; L / S < 1.25. d=3,9,15,21
|
|
jmp _LookupResult
|
|
|
|
_Region1:
|
|
; 1.25 < L / S < 2.5. d=2,4,8,10,14,16,20,22
|
|
lda region_number
|
|
clc
|
|
adc #4
|
|
sta region_number
|
|
bpl _LookupResult
|
|
|
|
_Region2:
|
|
; 2.5 < L / S < 7.5. d=1,5,7,11,13,17,19,23
|
|
lda region_number
|
|
clc
|
|
adc #8
|
|
sta region_number
|
|
bpl _LookupResult
|
|
|
|
_Region3:
|
|
; 7.5 < L / S. d=0,6,12,18
|
|
lda region_number
|
|
clc
|
|
adc #12
|
|
sta region_number
|
|
|
|
_LookupResult:
|
|
lda quadrant
|
|
clc
|
|
adc region_number
|
|
tax
|
|
lda _quadrant_region_to_direction,x
|
|
rts
|
|
|
|
_quadrant_region_to_direction:
|
|
.byte 9, 3,15,21
|
|
.byte 10, 2,14,22
|
|
.byte 11, 1,13,23
|
|
.byte 12, 0,12, 0
|
|
.byte 9, 3,15,21
|
|
.byte 8, 4,16,20
|
|
.byte 7, 5,17,19
|
|
.byte 6, 6,18,18
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub atan2(ubyte x1 @R0, ubyte y1 @R1, ubyte x2 @R2, ubyte y2 @R3) -> ubyte @A {
|
|
;; Calculate the angle, in a 256-degree circle, between two points into A.
|
|
;; The points (x1, y1) and (x2, y2) have to use *unsigned coordinates only* from the positive quadrant in the carthesian plane!
|
|
;; https://www.codebase64.org/doku.php?id=base:8bit_atan2_8-bit_angle
|
|
;; This uses 2 large lookup tables so uses a lot of memory but is super fast.
|
|
|
|
%asm {{
|
|
|
|
x1 = cx16.r0L
|
|
y1 = cx16.r1L
|
|
x2 = cx16.r2L
|
|
y2 = cx16.r3L
|
|
octant = cx16.r4L ;; temporary zeropage variable
|
|
|
|
lda x1
|
|
sec
|
|
sbc x2
|
|
bcs *+4
|
|
eor #$ff
|
|
tax
|
|
rol octant
|
|
|
|
lda y1
|
|
sec
|
|
sbc y2
|
|
bcs *+4
|
|
eor #$ff
|
|
tay
|
|
rol octant
|
|
|
|
lda log2_tab,x
|
|
sec
|
|
sbc log2_tab,y
|
|
bcc *+4
|
|
eor #$ff
|
|
tax
|
|
|
|
lda octant
|
|
rol a
|
|
and #%111
|
|
tay
|
|
|
|
lda atan_tab,x
|
|
eor octant_adjust,y
|
|
rts
|
|
|
|
octant_adjust
|
|
.byte %00111111 ;; x+,y+,|x|>|y|
|
|
.byte %00000000 ;; x+,y+,|x|<|y|
|
|
.byte %11000000 ;; x+,y-,|x|>|y|
|
|
.byte %11111111 ;; x+,y-,|x|<|y|
|
|
.byte %01000000 ;; x-,y+,|x|>|y|
|
|
.byte %01111111 ;; x-,y+,|x|<|y|
|
|
.byte %10111111 ;; x-,y-,|x|>|y|
|
|
.byte %10000000 ;; x-,y-,|x|<|y|
|
|
|
|
|
|
;;;;;;;; atan(2^(x/32))*128/pi ;;;;;;;;
|
|
|
|
atan_tab
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$00,$00,$00
|
|
.byte $00,$00,$00,$00,$00,$01,$01,$01
|
|
.byte $01,$01,$01,$01,$01,$01,$01,$01
|
|
.byte $01,$01,$01,$01,$01,$01,$01,$01
|
|
.byte $01,$01,$01,$01,$01,$01,$01,$01
|
|
.byte $01,$01,$01,$01,$01,$02,$02,$02
|
|
.byte $02,$02,$02,$02,$02,$02,$02,$02
|
|
.byte $02,$02,$02,$02,$02,$02,$02,$02
|
|
.byte $03,$03,$03,$03,$03,$03,$03,$03
|
|
.byte $03,$03,$03,$03,$03,$04,$04,$04
|
|
.byte $04,$04,$04,$04,$04,$04,$04,$04
|
|
.byte $05,$05,$05,$05,$05,$05,$05,$05
|
|
.byte $06,$06,$06,$06,$06,$06,$06,$06
|
|
.byte $07,$07,$07,$07,$07,$07,$08,$08
|
|
.byte $08,$08,$08,$08,$09,$09,$09,$09
|
|
.byte $09,$0a,$0a,$0a,$0a,$0b,$0b,$0b
|
|
.byte $0b,$0c,$0c,$0c,$0c,$0d,$0d,$0d
|
|
.byte $0d,$0e,$0e,$0e,$0e,$0f,$0f,$0f
|
|
.byte $10,$10,$10,$11,$11,$11,$12,$12
|
|
.byte $12,$13,$13,$13,$14,$14,$15,$15
|
|
.byte $15,$16,$16,$17,$17,$17,$18,$18
|
|
.byte $19,$19,$19,$1a,$1a,$1b,$1b,$1c
|
|
.byte $1c,$1c,$1d,$1d,$1e,$1e,$1f,$1f
|
|
|
|
|
|
;;;;;;;; log2(x)*32 ;;;;;;;;
|
|
|
|
log2_tab
|
|
.byte $00,$00,$20,$32,$40,$4a,$52,$59
|
|
.byte $60,$65,$6a,$6e,$72,$76,$79,$7d
|
|
.byte $80,$82,$85,$87,$8a,$8c,$8e,$90
|
|
.byte $92,$94,$96,$98,$99,$9b,$9d,$9e
|
|
.byte $a0,$a1,$a2,$a4,$a5,$a6,$a7,$a9
|
|
.byte $aa,$ab,$ac,$ad,$ae,$af,$b0,$b1
|
|
.byte $b2,$b3,$b4,$b5,$b6,$b7,$b8,$b9
|
|
.byte $b9,$ba,$bb,$bc,$bd,$bd,$be,$bf
|
|
.byte $c0,$c0,$c1,$c2,$c2,$c3,$c4,$c4
|
|
.byte $c5,$c6,$c6,$c7,$c7,$c8,$c9,$c9
|
|
.byte $ca,$ca,$cb,$cc,$cc,$cd,$cd,$ce
|
|
.byte $ce,$cf,$cf,$d0,$d0,$d1,$d1,$d2
|
|
.byte $d2,$d3,$d3,$d4,$d4,$d5,$d5,$d5
|
|
.byte $d6,$d6,$d7,$d7,$d8,$d8,$d9,$d9
|
|
.byte $d9,$da,$da,$db,$db,$db,$dc,$dc
|
|
.byte $dd,$dd,$dd,$de,$de,$de,$df,$df
|
|
.byte $df,$e0,$e0,$e1,$e1,$e1,$e2,$e2
|
|
.byte $e2,$e3,$e3,$e3,$e4,$e4,$e4,$e5
|
|
.byte $e5,$e5,$e6,$e6,$e6,$e7,$e7,$e7
|
|
.byte $e7,$e8,$e8,$e8,$e9,$e9,$e9,$ea
|
|
.byte $ea,$ea,$ea,$eb,$eb,$eb,$ec,$ec
|
|
.byte $ec,$ec,$ed,$ed,$ed,$ed,$ee,$ee
|
|
.byte $ee,$ee,$ef,$ef,$ef,$ef,$f0,$f0
|
|
.byte $f0,$f1,$f1,$f1,$f1,$f1,$f2,$f2
|
|
.byte $f2,$f2,$f3,$f3,$f3,$f3,$f4,$f4
|
|
.byte $f4,$f4,$f5,$f5,$f5,$f5,$f5,$f6
|
|
.byte $f6,$f6,$f6,$f7,$f7,$f7,$f7,$f7
|
|
.byte $f8,$f8,$f8,$f8,$f9,$f9,$f9,$f9
|
|
.byte $f9,$fa,$fa,$fa,$fa,$fa,$fb,$fb
|
|
.byte $fb,$fb,$fb,$fc,$fc,$fc,$fc,$fc
|
|
.byte $fd,$fd,$fd,$fd,$fd,$fd,$fe,$fe
|
|
.byte $fe,$fe,$fe,$ff,$ff,$ff,$ff,$ff
|
|
; !notreached!
|
|
}}
|
|
}
|
|
|
|
asmsub diff(ubyte v1 @A, ubyte v2 @Y) -> ubyte @A {
|
|
; -- returns the (absolute) difference, or distance, between the two bytes
|
|
%asm {{
|
|
sty P8ZP_SCRATCH_REG
|
|
sec
|
|
sbc P8ZP_SCRATCH_REG
|
|
bcs +
|
|
eor #255
|
|
inc a
|
|
+ rts
|
|
}}
|
|
}
|
|
|
|
asmsub diffw(uword w1 @R0, uword w2 @AY) -> uword @AY {
|
|
; -- returns the (absolute) difference, or distance, between the two words
|
|
%asm {{
|
|
sec
|
|
sbc cx16.r0L
|
|
sta cx16.r0L
|
|
tya
|
|
sbc cx16.r0H
|
|
sta cx16.r0H
|
|
bcs +
|
|
eor #255
|
|
sta cx16.r0H
|
|
lda cx16.r0L
|
|
eor #255
|
|
inc a
|
|
sta cx16.r0L
|
|
bne +
|
|
inc cx16.r0H
|
|
+ lda cx16.r0L
|
|
ldy cx16.r0H
|
|
rts
|
|
}}
|
|
}
|
|
|
|
sub crc16(uword data, uword length) -> uword {
|
|
; calculates the CRC16 (XMODEM) checksum of the buffer.
|
|
; There are also "streaming" crc16_start/update/end routines below, that allow you to calculate crc16 for data that doesn't fit in a single memory block.
|
|
crc16_start()
|
|
cx16.r13 = data
|
|
cx16.r14 = data+length
|
|
while cx16.r13!=cx16.r14 {
|
|
crc16_update(@(cx16.r13))
|
|
cx16.r13++
|
|
}
|
|
return crc16_end()
|
|
}
|
|
|
|
sub crc16_start() {
|
|
; start the "streaming" crc16
|
|
; note: tracks the crc16 checksum in cx16.r15!
|
|
; if your code uses that, it must save/restore it before calling this routine
|
|
cx16.r15 = 0
|
|
}
|
|
|
|
asmsub crc16_update(ubyte value @A) {
|
|
; update the "streaming" crc16 with next byte value
|
|
; note: tracks the crc16 checksum in cx16.r15!
|
|
; if your code uses that, it must save/restore it before calling this routine
|
|
%asm {{
|
|
eor cx16.r15H
|
|
sta cx16.r15H
|
|
ldy #8
|
|
- lda cx16.r15H
|
|
asl cx16.r15L
|
|
rol cx16.r15H
|
|
and #$80
|
|
beq +
|
|
lda cx16.r15H
|
|
eor #$10
|
|
sta cx16.r15H
|
|
lda cx16.r15L
|
|
eor #$21
|
|
sta cx16.r15L
|
|
+ dey
|
|
bne -
|
|
rts
|
|
}}
|
|
; orignal prog8 code was:
|
|
; cx16.r15H ^= value
|
|
; repeat 8 {
|
|
; if cx16.r15H & $80 !=0 {
|
|
; cx16.r15 <<=1
|
|
; cx16.r15 ^= $1021
|
|
; } else
|
|
; cx16.r15<<=1
|
|
; }
|
|
}
|
|
|
|
sub crc16_end() -> uword {
|
|
; finalize the "streaming" crc16, returns resulting crc16 value
|
|
return cx16.r15
|
|
}
|
|
|
|
sub crc32(uword data, uword length) {
|
|
; Calculates the CRC-32 (POSIX) checksum of the buffer.
|
|
; because prog8 doesn't have 32 bits integers, we have to split up the calculation over 2 words.
|
|
; result stored in cx16.r14 (low word) and cx16.r15 (high word)
|
|
; There are also "streaming" crc32_start/update/end routines below, that allow you to calculate crc32 for data that doesn't fit in a single memory block.
|
|
crc32_start()
|
|
cx16.r12 = data
|
|
cx16.r13 = data+length
|
|
while cx16.r12!=cx16.r13 {
|
|
crc32_update(@(cx16.r12))
|
|
cx16.r12++
|
|
}
|
|
crc32_end()
|
|
}
|
|
|
|
sub crc32_start() {
|
|
; start the "streaming" crc32
|
|
; note: tracks the crc32 checksum in cx16.r14 and cx16.r15!
|
|
; if your code uses these, it must save/restore them before calling this routine
|
|
cx16.r14 = cx16.r15 = 0
|
|
}
|
|
|
|
asmsub crc32_update(ubyte value @A) {
|
|
; update the "streaming" crc32 with next byte value
|
|
; note: tracks the crc32 checksum in cx16.r14 and cx16.r15!
|
|
; if your code uses these, it must save/restore them before calling this routine
|
|
%asm {{
|
|
eor cx16.r15H
|
|
sta cx16.r15H
|
|
ldy #8
|
|
- lda cx16.r15H
|
|
asl cx16.r14L
|
|
rol cx16.r14H
|
|
rol cx16.r15L
|
|
rol cx16.r15H
|
|
and #$80
|
|
beq +
|
|
lda cx16.r15H
|
|
eor #$04
|
|
sta cx16.r15H
|
|
lda cx16.r15L
|
|
eor #$c1
|
|
sta cx16.r15L
|
|
lda cx16.r14H
|
|
eor #$1d
|
|
sta cx16.r14H
|
|
lda cx16.r14L
|
|
eor #$b7
|
|
sta cx16.r14L
|
|
+ dey
|
|
bne -
|
|
rts
|
|
}}
|
|
; original prog8 code:
|
|
; cx16.r15H ^= value
|
|
; repeat 8 {
|
|
; if cx16.r15H & $80 !=0 {
|
|
; cx16.r14 <<= 1
|
|
; rol(cx16.r15)
|
|
; cx16.r15 ^= $04c1
|
|
; cx16.r14 ^= $1db7
|
|
; }
|
|
; else {
|
|
; cx16.r14 <<= 1
|
|
; rol(cx16.r15)
|
|
; }
|
|
; }
|
|
|
|
}
|
|
|
|
sub crc32_end() {
|
|
; finalize the "streaming" crc32
|
|
; result stored in cx16.r14 (low word) and cx16.r15 (high word)
|
|
cx16.r15 ^= $ffff
|
|
cx16.r14 ^= $ffff
|
|
}
|
|
|
|
|
|
sub lerp(ubyte v0, ubyte v1, ubyte t) -> ubyte {
|
|
; Linear interpolation (LERP)
|
|
; returns an interpolation between two inputs (v0, v1) for a parameter t in the interval [0, 255]
|
|
; guarantees v = v1 when t = 255
|
|
return v0 + msb(t as uword * (v1 - v0) + 255)
|
|
}
|
|
}
|