mirror of
https://github.com/sheumann/65816-crypto.git
synced 2024-11-25 10:30:45 +00:00
465 lines
6.8 KiB
Plaintext
465 lines
6.8 KiB
Plaintext
* Copyright (c) 2017 Stephen Heumann
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software for any
|
|
* purpose with or without fee is hereby granted, provided that the above
|
|
* copyright notice and this permission notice appear in all copies.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
|
|
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
|
|
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
|
|
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
|
|
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
|
|
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
|
|
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
|
|
|
|
|
* Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
|
|
macro
|
|
ROTR4 &loc,&n
|
|
aif &n>16,.dorotl
|
|
lda &loc+2
|
|
lcla &i
|
|
&i seta &n
|
|
.rotrloop
|
|
lsr a ;to set carry
|
|
ror &loc
|
|
ror &loc+2
|
|
&i seta &i-1
|
|
aif &i>0,.rotrloop
|
|
ago .end
|
|
.dorotl
|
|
ROTL4 &loc,32-&n
|
|
.end
|
|
mend
|
|
|
|
* Expects the contents of &loc+2 to be loaded in A already
|
|
macro
|
|
ROTR4CONT &loc,&n
|
|
aif &n>16,.dorotl
|
|
lcla &i
|
|
&i seta &n
|
|
.rotrloop
|
|
lsr a ;to set carry
|
|
ror &loc
|
|
ror &loc+2
|
|
&i seta &i-1
|
|
aif &i>0,.rotrloop
|
|
ago .end
|
|
.dorotl
|
|
ROTL4 &loc,32-&n
|
|
.end
|
|
mend
|
|
|
|
* Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
|
|
macro
|
|
ROTL4 &loc,&n
|
|
aif &n>16,.dorotr2
|
|
lda &loc
|
|
lcla &i
|
|
&i seta &n
|
|
.rotlloop2
|
|
asl a ;to set carry
|
|
rol &loc+2
|
|
rol &loc
|
|
&i seta &i-1
|
|
aif &i>0,.rotlloop2
|
|
ago .end2
|
|
.dorotr2
|
|
ROTR4 &loc,32-&n
|
|
.end2
|
|
mend
|
|
|
|
* Expects the contents of &loc to be loaded in A already
|
|
macro
|
|
ROTL4CONT &loc,&n
|
|
aif &n>16,.dorotr2
|
|
lcla &i
|
|
&i seta &n
|
|
.rotlloop2
|
|
asl a ;to set carry
|
|
rol &loc+2
|
|
rol &loc
|
|
&i seta &i-1
|
|
aif &i>0,.rotlloop2
|
|
ago .end2
|
|
.dorotr2
|
|
ROTR4 &loc,32-&n
|
|
.end2
|
|
mend
|
|
|
|
* &to := &from ROTR4 &n
|
|
macro
|
|
ROTR4MOVE &to,&from,&n
|
|
aif &n>16,.dorotl3
|
|
lda &from
|
|
sta &to
|
|
lda &from+2
|
|
sta &to+2
|
|
lcla &i
|
|
&i seta &n
|
|
.rotrloop3
|
|
lsr a ;to set carry
|
|
ror &to
|
|
ror &to+2
|
|
&i seta &i-1
|
|
aif &i>0,.rotrloop3
|
|
ago .end3
|
|
.dorotl3
|
|
ROTL4MOVE &to,&from,32-&n
|
|
.end3
|
|
mend
|
|
|
|
* &to := &from ROTL4 &n
|
|
macro
|
|
ROTL4MOVE &to,&from,&n
|
|
aif &n>16,.dorotr4
|
|
lda &from+2
|
|
sta &to+2
|
|
lda &from
|
|
sta &to
|
|
lcla &i
|
|
&i seta &n
|
|
.rotlloop4
|
|
asl a ;to set carry
|
|
rol &to+2
|
|
rol &to
|
|
&i seta &i-1
|
|
aif &i>0,.rotlloop4
|
|
ago .end4
|
|
.dorotr4
|
|
ROTR4MOVE &to,&from,32-&n
|
|
.end4
|
|
mend
|
|
|
|
|
|
* This makes a function wrapper that is callable from C,
|
|
* taking a pointer to the context structure as its argument.
|
|
macro
|
|
CFunction &fn
|
|
phb
|
|
plx
|
|
ply
|
|
tdc
|
|
pld
|
|
plb
|
|
plb
|
|
phy
|
|
phx
|
|
plb
|
|
pha
|
|
jsl &fn
|
|
pld
|
|
rtl
|
|
mend
|
|
|
|
|
|
* Macros to operate on elements of the message schedule (W)
|
|
macro
|
|
&lab lda_w &i,&inc
|
|
lcla &j
|
|
&j seta &i
|
|
.modloop1
|
|
aif &j<16,.goodidx1
|
|
&j seta &j-16
|
|
ago .modloop1
|
|
.goodidx1
|
|
aif C:&inc<>0,.haveinc1
|
|
lcla &inc
|
|
.haveinc1
|
|
&lab lda w+(&j)*4+&inc
|
|
mend
|
|
|
|
macro
|
|
&lab sta_w &i,&inc
|
|
lcla &j
|
|
&j seta &i
|
|
.modloop3
|
|
aif &j<16,.goodidx3
|
|
&j seta &j-16
|
|
ago .modloop3
|
|
.goodidx3
|
|
aif C:&inc<>0,.haveinc3
|
|
lcla &inc
|
|
.haveinc3
|
|
&lab sta w+(&j)*4+&inc
|
|
mend
|
|
|
|
macro
|
|
&lab adc_w &i,&inc
|
|
lcla &j
|
|
&j seta &i
|
|
.modloop4
|
|
aif &j<16,.goodidx4
|
|
&j seta &j-16
|
|
ago .modloop4
|
|
.goodidx4
|
|
aif C:&inc<>0,.haveinc4
|
|
lcla &inc
|
|
.haveinc4
|
|
&lab adc w+(&j)*4+&inc
|
|
mend
|
|
|
|
|
|
* Compute one part of the message schedule (16 elements)
|
|
macro
|
|
ComputeSchedule &part
|
|
lcla &i
|
|
|
|
; Flip the endianness of W_0 to W_15 (the current block of the message)
|
|
aif &part<>1,.skippart1
|
|
.loop1
|
|
lda w+&i*4
|
|
xba
|
|
ldx w+&i*4+2
|
|
sta w+&i*4+2
|
|
txa
|
|
xba
|
|
sta w+&i*4
|
|
&i seta &i+1
|
|
aif &i<16,.loop1
|
|
ago .end
|
|
.skippart1
|
|
|
|
; compute the rest of the message schedule (W_16 to W_63)
|
|
&i seta (&part-1)*16
|
|
.loop2
|
|
; sigma_0 + w[i-16] computation
|
|
lda_w &i-15,-1
|
|
and #$FF00
|
|
sta temp1+2
|
|
lda_w &i-15,3
|
|
and #$00FF
|
|
ora temp1+2
|
|
sta temp1+2
|
|
lda_w &i-15,1
|
|
sta temp1
|
|
ROTL4CONT temp1,1
|
|
lda_w &i-15,2
|
|
sta temp2
|
|
lda_w &i-15
|
|
sta temp2+2
|
|
ROTR4CONT temp2,2
|
|
lda_w &i-15,2
|
|
lsr a
|
|
sta temp3+2
|
|
lda_w &i-15
|
|
ror a
|
|
sta temp3
|
|
lsr temp3+2
|
|
ror temp3
|
|
lsr temp3+2
|
|
lda temp3
|
|
ror a
|
|
eor temp2
|
|
eor temp1
|
|
clc
|
|
adc_w &i-16
|
|
sta temp1
|
|
lda temp3+2
|
|
eor temp2+2
|
|
eor temp1+2
|
|
adc_w &i-16,2
|
|
sta temp1+2
|
|
|
|
; sigma_1 + w[i-7] computation
|
|
lda_w &i-2,2
|
|
sta temp2
|
|
lda_w &i-2
|
|
sta temp2+2
|
|
ROTR4CONT temp2,1
|
|
ROTR4MOVE temp3,temp2,2
|
|
lda_w &i-2,3
|
|
and #$00FF
|
|
lsr a
|
|
tax
|
|
lda_w &i-2,1
|
|
ror a
|
|
tay
|
|
txa
|
|
lsr a
|
|
tax
|
|
tya
|
|
ror a
|
|
eor temp3
|
|
eor temp2
|
|
clc
|
|
adc_w &i-7
|
|
tay
|
|
txa
|
|
eor temp3+2
|
|
eor temp2+2
|
|
adc_w &i-7,2
|
|
tax
|
|
|
|
clc
|
|
tya
|
|
adc temp1
|
|
sta_w &i
|
|
txa
|
|
adc temp1+2
|
|
sta_w &i,2
|
|
|
|
&i seta &i+1
|
|
aif &i<&part*16,.loop2
|
|
.end
|
|
mend
|
|
|
|
|
|
|
|
* One iteration of the loop for processing blocks.
|
|
* The a,b,c,d,e,f,g,h variables are given as parameters so we can avoid
|
|
* cycling them.
|
|
macro
|
|
BlockLoopIter &a,&b,&c,&d,&e,&f,&g,&h,&iter,&k_idx1,&k_idx2
|
|
|
|
; Sigma_1+w[i] computation
|
|
lda &e+1
|
|
sta temp1
|
|
sta temp2
|
|
lda &e-1
|
|
ora &e+3
|
|
sta temp1+2
|
|
sta temp2+2
|
|
ROTR4CONT temp2,3
|
|
ROTL4 temp1,2
|
|
lda &e-1
|
|
ora &e+3
|
|
sta temp3
|
|
lda &e+1
|
|
sta temp3+2
|
|
ROTR4CONT temp3,1
|
|
lda temp1
|
|
eor temp2
|
|
eor temp3
|
|
clc
|
|
ldx idx
|
|
adc w+&iter*4,x
|
|
sta temp1
|
|
lda temp1+2
|
|
eor temp2+2
|
|
eor temp3+2
|
|
adc w+&iter*4+2,x
|
|
sta temp1+2
|
|
|
|
; ch+Sigma_1+W[i] computation
|
|
lda &f
|
|
eor &g
|
|
and &e
|
|
eor &g
|
|
clc
|
|
adc temp1
|
|
sta temp2
|
|
lda &f+2
|
|
eor &g+2
|
|
and &e+2
|
|
eor &g+2
|
|
adc temp1+2
|
|
sta temp2+2
|
|
|
|
; T_1 computation
|
|
clc
|
|
lda &h
|
|
adc temp2
|
|
tay
|
|
lda &h+2
|
|
adc temp2+2
|
|
tax
|
|
clc
|
|
tya
|
|
ldy k_ptr
|
|
adc (&k_idx1),y
|
|
sta temp1
|
|
txa
|
|
adc (&k_idx2),y
|
|
sta temp1+2
|
|
tax
|
|
|
|
clc
|
|
lda temp1
|
|
adc &d
|
|
sta &d
|
|
txa
|
|
adc &d+2
|
|
sta &d+2
|
|
|
|
;Sigma_0+T_1 computation
|
|
ROTR4MOVE temp2,&a,2
|
|
lda &a
|
|
sta temp3+2
|
|
lda &a+2
|
|
sta temp3
|
|
ROTL4CONT temp3,3
|
|
lda &a+1
|
|
sta temp4+2
|
|
lda &a-1
|
|
ora &a+3
|
|
sta temp4
|
|
ROTL4CONT temp4,2
|
|
lda temp2
|
|
eor temp3
|
|
eor temp4
|
|
clc
|
|
adc temp1
|
|
sta temp2
|
|
lda temp2+2
|
|
eor temp3+2
|
|
eor temp4+2
|
|
adc temp1+2
|
|
sta temp2+2
|
|
|
|
;maj and T_2 computation (saved to &h)
|
|
lda &a
|
|
ora &b
|
|
and &c
|
|
sta temp3
|
|
lda &a
|
|
and &b
|
|
ora temp3
|
|
clc
|
|
adc temp2
|
|
sta &h
|
|
lda &a+2
|
|
ora &b+2
|
|
and &c+2
|
|
sta temp3+2
|
|
lda &a+2
|
|
and &b+2
|
|
ora temp3+2
|
|
adc temp2+2
|
|
sta &h+2
|
|
|
|
mend
|
|
|
|
|
|
* One part of the loop for processing blocks (16 iterations)
|
|
macro
|
|
BlockLoopPart &part
|
|
|
|
stz idx
|
|
loop&part anop
|
|
|
|
BlockLoopIter a_,b,c,d,e,f,g,h,0,zero,two
|
|
BlockLoopIter h,a_,b,c,d,e,f,g,1,four,six
|
|
BlockLoopIter g,h,a_,b,c,d,e,f,2,eight,ten
|
|
BlockLoopIter f,g,h,a_,b,c,d,e,3,twelve,fourteen
|
|
BlockLoopIter e,f,g,h,a_,b,c,d,4,sixteen,eighteen
|
|
BlockLoopIter d,e,f,g,h,a_,b,c,5,twenty,twentytwo
|
|
BlockLoopIter c,d,e,f,g,h,a_,b,6,twentyfour,twentysix
|
|
BlockLoopIter b,c,d,e,f,g,h,a_,7,twentyeight,thirty
|
|
|
|
clc
|
|
lda k_ptr
|
|
adc #4*8
|
|
sta k_ptr
|
|
|
|
clc
|
|
lda idx
|
|
adc #4*8
|
|
cmp #16*4
|
|
bge endloop&part
|
|
sta idx
|
|
jmp loop&part
|
|
endloop&part anop
|
|
mend
|
|
|