2023-12-01 00:29:50 +00:00
|
|
|
* Copyright (c) 2017,2023 Stephen Heumann
|
|
|
|
*
|
2024-06-27 02:46:58 +00:00
|
|
|
* Permission to use, copy, modify, and/or distribute this software for any
|
2023-12-01 00:29:50 +00:00
|
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
|
|
* copyright notice and this permission notice appear in all copies.
|
|
|
|
*
|
|
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
|
|
|
|
|
|
|
|
* Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
|
|
|
|
macro
|
|
|
|
ROTR4 &loc,&n
|
|
|
|
aif &n>16,.dorotl
|
|
|
|
lda &loc+2
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotrloop
|
|
|
|
lsr a ;to set carry
|
|
|
|
ror &loc
|
|
|
|
ror &loc+2
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotrloop
|
|
|
|
ago .end
|
|
|
|
.dorotl
|
|
|
|
ROTL4 &loc,32-&n
|
|
|
|
.end
|
|
|
|
mend
|
|
|
|
|
|
|
|
* Expects the contents of &loc+2 to be loaded in A already
|
|
|
|
macro
|
|
|
|
ROTR4CONT &loc,&n
|
|
|
|
aif &n>16,.dorotl
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotrloop
|
|
|
|
lsr a ;to set carry
|
|
|
|
ror &loc
|
|
|
|
ror &loc+2
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotrloop
|
|
|
|
ago .end
|
|
|
|
.dorotl
|
|
|
|
ROTL4 &loc,32-&n
|
|
|
|
.end
|
|
|
|
mend
|
|
|
|
|
|
|
|
* Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
|
|
|
|
macro
|
|
|
|
ROTL4 &loc,&n
|
|
|
|
aif &n>16,.dorotr2
|
|
|
|
lda &loc
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotlloop2
|
|
|
|
asl a ;to set carry
|
|
|
|
rol &loc+2
|
|
|
|
rol &loc
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotlloop2
|
|
|
|
ago .end2
|
|
|
|
.dorotr2
|
|
|
|
ROTR4 &loc,32-&n
|
|
|
|
.end2
|
|
|
|
mend
|
|
|
|
|
|
|
|
* Expects the contents of &loc to be loaded in A already
|
|
|
|
macro
|
|
|
|
ROTL4CONT &loc,&n
|
|
|
|
aif &n>16,.dorotr2
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotlloop2
|
|
|
|
asl a ;to set carry
|
|
|
|
rol &loc+2
|
|
|
|
rol &loc
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotlloop2
|
|
|
|
ago .end2
|
|
|
|
.dorotr2
|
|
|
|
ROTR4 &loc,32-&n
|
|
|
|
.end2
|
|
|
|
mend
|
|
|
|
|
|
|
|
* &to := &from ROTR4 &n
|
|
|
|
macro
|
|
|
|
ROTR4MOVE &to,&from,&n
|
|
|
|
aif &n>16,.dorotl3
|
|
|
|
lda &from
|
|
|
|
sta &to
|
|
|
|
lda &from+2
|
|
|
|
sta &to+2
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotrloop3
|
|
|
|
lsr a ;to set carry
|
|
|
|
ror &to
|
|
|
|
ror &to+2
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotrloop3
|
|
|
|
ago .end3
|
|
|
|
.dorotl3
|
|
|
|
ROTL4MOVE &to,&from,32-&n
|
|
|
|
.end3
|
|
|
|
mend
|
|
|
|
|
|
|
|
* &to := &from ROTL4 &n
|
|
|
|
macro
|
|
|
|
ROTL4MOVE &to,&from,&n
|
|
|
|
aif &n>16,.dorotr4
|
|
|
|
lda &from+2
|
|
|
|
sta &to+2
|
|
|
|
lda &from
|
|
|
|
sta &to
|
|
|
|
lcla &i
|
|
|
|
&i seta &n
|
|
|
|
.rotlloop4
|
|
|
|
asl a ;to set carry
|
|
|
|
rol &to+2
|
|
|
|
rol &to
|
|
|
|
&i seta &i-1
|
|
|
|
aif &i>0,.rotlloop4
|
|
|
|
ago .end4
|
|
|
|
.dorotr4
|
|
|
|
ROTR4MOVE &to,&from,32-&n
|
|
|
|
.end4
|
|
|
|
mend
|
|
|
|
|
|
|
|
* Left-rotate with various optimizations applied
|
|
|
|
macro
|
|
|
|
ROTL4AUTO &loc,&n,&haveLocPlus2
|
|
|
|
aif &n>4,.skip1
|
|
|
|
ROTL4 &loc,&n
|
|
|
|
mexit
|
|
|
|
.skip1
|
|
|
|
aif &n>7,.skip2
|
|
|
|
ldx &loc+1
|
|
|
|
lda &loc+3
|
|
|
|
ora &loc-1
|
|
|
|
sta &loc
|
|
|
|
stx &loc+2
|
|
|
|
txa
|
|
|
|
ROTR4CONT &loc,8-&n
|
|
|
|
mexit
|
|
|
|
.skip2
|
|
|
|
aif &n>11,.skip3
|
|
|
|
ldx &loc+1
|
|
|
|
lda &loc+3
|
|
|
|
ora &loc-1
|
|
|
|
sta &loc
|
|
|
|
stx &loc+2
|
|
|
|
ROTL4CONT &loc,&n-8
|
|
|
|
mexit
|
|
|
|
.skip3
|
|
|
|
aif &n>15,.skip4
|
|
|
|
aif C:&haveLocPlus2>0,.noload1
|
|
|
|
ldy &loc+2
|
|
|
|
ago .didload1
|
|
|
|
.noload1
|
|
|
|
tay
|
|
|
|
.didload1
|
|
|
|
lda &loc
|
|
|
|
sta &loc+2
|
|
|
|
sty &loc
|
|
|
|
ROTR4CONT &loc,16-&n
|
|
|
|
mexit
|
|
|
|
.skip4
|
|
|
|
aif &n>16,.skip5
|
|
|
|
aif C:&haveLocPlus2>0,.noload2
|
|
|
|
lda &loc+2
|
|
|
|
.noload2
|
|
|
|
ldy &loc
|
|
|
|
sta &loc
|
|
|
|
sty &loc+2
|
|
|
|
mexit
|
|
|
|
.skip5
|
|
|
|
aif &n>20,.skip6
|
|
|
|
aif C:&haveLocPlus2>0,.noload2
|
|
|
|
lda &loc+2
|
|
|
|
.noload2
|
|
|
|
ldy &loc
|
|
|
|
sta &loc
|
|
|
|
sty &loc+2
|
|
|
|
ROTL4CONT &loc,&n-16
|
|
|
|
mexit
|
|
|
|
.skip6
|
|
|
|
ldx &loc+1
|
|
|
|
lda &loc+3
|
|
|
|
ora &loc-1
|
|
|
|
sta &loc+2
|
|
|
|
stx &loc
|
|
|
|
ROTR4CONT &loc,24-&n
|
|
|
|
mend
|
|
|
|
|
|
|
|
|
|
|
|
* This makes a function wrapper that is callable from C,
|
|
|
|
* taking a pointer to the context structure as its argument.
|
|
|
|
macro
|
|
|
|
CFunction &fn
|
|
|
|
phb
|
|
|
|
plx
|
|
|
|
ply
|
|
|
|
tdc
|
|
|
|
pld
|
|
|
|
plb
|
|
|
|
plb
|
|
|
|
phy
|
|
|
|
phx
|
|
|
|
plb
|
|
|
|
pha
|
|
|
|
jsl &fn
|
|
|
|
pld
|
|
|
|
rtl
|
|
|
|
mend
|
|
|
|
|
|
|
|
|
|
|
|
* One iteration of the loop for processing blocks.
|
|
|
|
* The a,b,c,d variables are given as parameters so we can avoid cycling them.
|
|
|
|
* shift is a per-round shift amount.
|
|
|
|
macro
|
|
|
|
BlockLoopIter &a,&b,&c,&d,&shift
|
|
|
|
|
|
|
|
* f_0 to f_15
|
|
|
|
aif &part<>1,.skip1
|
|
|
|
lda &c
|
|
|
|
eor &d
|
|
|
|
and &b
|
|
|
|
eor &d
|
|
|
|
clc
|
|
|
|
adc &a
|
|
|
|
sta temp
|
|
|
|
|
|
|
|
lda &c+2
|
|
|
|
eor &d+2
|
|
|
|
and &b+2
|
|
|
|
eor &d+2
|
|
|
|
adc &a+2
|
|
|
|
sta temp+2
|
|
|
|
.skip1
|
|
|
|
|
|
|
|
* f_16 to f_31
|
|
|
|
aif &part<>2,.skip2
|
|
|
|
lda &c
|
|
|
|
tax
|
|
|
|
ora &d
|
|
|
|
and &b
|
|
|
|
sta temp
|
|
|
|
txa
|
|
|
|
and &d
|
|
|
|
ora temp
|
|
|
|
clc
|
|
|
|
adc &a
|
|
|
|
tay
|
|
|
|
|
|
|
|
lda &c+2
|
|
|
|
tax
|
|
|
|
ora &d+2
|
|
|
|
and &b+2
|
|
|
|
sta temp+2
|
|
|
|
txa
|
|
|
|
and &d+2
|
|
|
|
ora temp+2
|
|
|
|
adc &a+2
|
|
|
|
tax
|
|
|
|
|
|
|
|
clc
|
|
|
|
tya
|
|
|
|
adc #$7999
|
|
|
|
sta temp
|
|
|
|
txa
|
|
|
|
adc #$5A82
|
|
|
|
sta temp+2
|
|
|
|
.skip2
|
|
|
|
|
|
|
|
* f_32 to f_47
|
|
|
|
aif &part<>3,.skip3
|
|
|
|
lda &b
|
|
|
|
eor &c
|
|
|
|
eor &d
|
|
|
|
clc
|
|
|
|
adc &a
|
|
|
|
tay
|
|
|
|
|
|
|
|
lda &b+2
|
|
|
|
eor &c+2
|
|
|
|
eor &d+2
|
|
|
|
adc &a+2
|
|
|
|
tax
|
|
|
|
|
|
|
|
clc
|
|
|
|
tya
|
|
|
|
adc #$EBA1
|
|
|
|
sta temp
|
|
|
|
txa
|
|
|
|
adc #$6ED9
|
|
|
|
sta temp+2
|
|
|
|
.skip3
|
|
|
|
|
|
|
|
ldy idx
|
|
|
|
ldx g_times_4,y
|
|
|
|
lda m,x
|
|
|
|
clc
|
|
|
|
adc temp
|
|
|
|
sta temp
|
|
|
|
inx
|
|
|
|
inx
|
|
|
|
lda m,x
|
|
|
|
adc temp+2
|
|
|
|
sta temp+2
|
|
|
|
|
|
|
|
iny
|
|
|
|
iny
|
|
|
|
sty idx
|
|
|
|
|
|
|
|
ROTL4AUTO temp,&shift,1
|
|
|
|
|
|
|
|
lda temp
|
|
|
|
sta &a
|
|
|
|
lda temp+2
|
|
|
|
sta &a+2
|
|
|
|
|
|
|
|
mend
|
|
|
|
|
|
|
|
|
|
|
|
* One part of the loop for processing blocks (16 iterations)
|
|
|
|
macro
|
|
|
|
BlockLoopPart &part
|
|
|
|
|
|
|
|
loop&part anop
|
|
|
|
|
|
|
|
aif &part<>1,.skip1a
|
|
|
|
BlockLoopIter a_,b,c,d,3
|
|
|
|
BlockLoopIter d,a_,b,c,7
|
|
|
|
BlockLoopIter c,d,a_,b,11
|
|
|
|
BlockLoopIter b,c,d,a_,19
|
|
|
|
.skip1a
|
|
|
|
aif &part<>2,.skip2a
|
|
|
|
BlockLoopIter a_,b,c,d,3
|
|
|
|
BlockLoopIter d,a_,b,c,5
|
|
|
|
BlockLoopIter c,d,a_,b,9
|
|
|
|
BlockLoopIter b,c,d,a_,13
|
|
|
|
.skip2a
|
|
|
|
aif &part<>3,.skip3a
|
|
|
|
BlockLoopIter a_,b,c,d,3
|
|
|
|
BlockLoopIter d,a_,b,c,9
|
|
|
|
BlockLoopIter c,d,a_,b,11
|
|
|
|
BlockLoopIter b,c,d,a_,15
|
|
|
|
.skip3a
|
|
|
|
|
|
|
|
lda idx
|
|
|
|
cmp #16*2*&part
|
|
|
|
bge endloop&part
|
|
|
|
jmp loop&part
|
|
|
|
endloop&part anop
|
|
|
|
mend
|
|
|
|
|