Add initial implementation of the basic block-processing function for SHA-256.

This commit is contained in:
Stephen Heumann 2017-07-03 14:27:19 -05:00
parent 2c34d17925
commit 91c107eb98
4 changed files with 850 additions and 0 deletions

253
sha256.asm Normal file
View File

@ -0,0 +1,253 @@
* Copyright (c) 2017 Stephen Heumann
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* Implementation of the SHA-256 (and SHA-224) hash function for the 65816
*
* The basic structure of the hash computation is described in FIPS PUB 180-4,
* although this implementation rearranges some things for better performance.
case on
mcopy sha256.macros
* Direct page locations
length gequ 0
extra gequ 8
idx gequ 10
a_ gequ 12 ; elements of state
b gequ 16
c gequ 20
d gequ 24
e gequ 28
f gequ 32
g gequ 36
h gequ 40
temp1 gequ 44
temp2 gequ 48
ch gequ 52
maj gequ 56
h0 gequ 60
h1 gequ 64
h2 gequ 68
h3 gequ 72
h4 gequ 76
h5 gequ 80
h6 gequ 84
h7 gequ 88
w gequ 92
temp3 gequ 156
temp4 gequ 160
k_ptr gequ 164
k private
dc i4'$428a2f98, $71374491, $b5c0fbcf, $e9b5dba5'
dc i4'$3956c25b, $59f111f1, $923f82a4, $ab1c5ed5'
dc i4'$d807aa98, $12835b01, $243185be, $550c7dc3'
dc i4'$72be5d74, $80deb1fe, $9bdc06a7, $c19bf174'
dc i4'$e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc'
dc i4'$2de92c6f, $4a7484aa, $5cb0a9dc, $76f988da'
dc i4'$983e5152, $a831c66d, $b00327c8, $bf597fc7'
dc i4'$c6e00bf3, $d5a79147, $06ca6351, $14292967'
dc i4'$27b70a85, $2e1b2138, $4d2c6dfc, $53380d13'
dc i4'$650a7354, $766a0abb, $81c2c92e, $92722c85'
dc i4'$a2bfe8a1, $a81a664b, $c24b8b70, $c76c51a3'
dc i4'$d192e819, $d6990624, $f40e3585, $106aa070'
dc i4'$19a4c116, $1e376c08, $2748774c, $34b0bcb5'
dc i4'$391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3'
dc i4'$748f82ee, $78a5636f, $84c87814, $8cc70208'
dc i4'$90befffa, $a4506ceb, $bef9a3f7, $c67178f2'
end
* Initialize a SHA-256 context.
* This must be called before any of the other SHA-256 functions.
sha256_init start
CFunction SHA256_INIT
end
SHA256_INIT start
lda #$e667
sta h0
lda #$6a09
sta h0+2
lda #$ae85
sta h1
lda #$bb67
sta h1+2
lda #$f372
sta h2
lda #$3c6e
sta h2+2
lda #$f53a
sta h3
lda #$a54f
sta h3+2
lda #$527f
sta h4
lda #$510e
sta h4+2
lda #$688c
sta h5
lda #$9b05
sta h5+2
lda #$d9ab
sta h6
lda #$1f83
sta h6+2
lda #$cd19
sta h7
lda #$5be0
sta h7+2
stz length
stz length+2
stz length+4
stz length+6
stz extra
rtl
end
* Process one 64-byte block through the SHA-256 hashing function.
* This is a low-level function; users should normally not call this directly.
sha256_processblock start
CFunction SHA256_PROCESSBLOCK
end
SHA256_PROCESSBLOCK start
lda h0
sta a_
lda h0+2
sta a_+2
lda h1
sta b
lda h1+2
sta b+2
lda h2
sta c
lda h2+2
sta c+2
lda h3
sta d
lda h3+2
sta d+2
lda h4
sta e
lda h4+2
sta e+2
lda h5
sta f
lda h5+2
sta f+2
lda h6
sta g
lda h6+2
sta g+2
lda h6
sta g
lda h6+2
sta g+2
lda h7
sta h
lda h7+2
sta h+2
lda #k
sta k_ptr
ComputeSchedule 1
BlockLoopPart 1
jsr ComputeScheduleSub
BlockLoopPart 2
jsr ComputeScheduleSub
BlockLoopPart 3
jsr ComputeScheduleSub
BlockLoopPart 4
endloop clc
lda h0
adc a_
sta h0
lda h0+2
adc a_+2
sta h0+2
clc
lda h1
adc b
sta h1
lda h1+2
adc b+2
sta h1+2
clc
lda h2
adc c
sta h2
lda h2+2
adc c+2
sta h2+2
clc
lda h3
adc d
sta h3
lda h3+2
adc d+2
sta h3+2
clc
lda h4
adc e
sta h4
lda h4+2
adc e+2
sta h4+2
clc
lda h5
adc f
sta h5
lda h5+2
adc f+2
sta h5+2
clc
lda h6
adc g
sta h6
lda h6+2
adc g+2
sta h6+2
clc
lda h7
adc h
sta h7
lda h7+2
adc h+2
sta h7+2
rtl
ComputeScheduleSub anop
ComputeSchedule 2
rts
end

53
sha256.h Normal file
View File

@ -0,0 +1,53 @@
/*
* Copyright (c) 2017 Stephen Heumann
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
struct sha256_context {
unsigned long length;
unsigned long length2;
unsigned short extra;
unsigned short idx;
unsigned long vars[8];
unsigned char reserved1[16];
unsigned char hash[32];
unsigned char block[64];
unsigned char reserved2[16];
};
/*
* The context structure must be in bank 0, preferably page-aligned.
*/
/*
* Initialize a SHA-256 context.
* This must be called before any of the other SHA-256 functions.
*/
void sha256_init(struct sha256_context *context);
/*
* Update a SHA-256 context based on the specified data.
*/
void sha256_update(struct sha256_context *context, const unsigned char *data, unsigned long length);
/*
* Finish SHA-256 processing and generate the final hash code.
*/
void sha256_finalize(struct sha256_context *context);
/*
* Process one 64-byte block through the SHA-256 hashing function.
* This is a low-level function; users should normally not call this directly.
*/
void sha256_processblock(struct sha256_context *context);

437
sha256.macros Normal file
View File

@ -0,0 +1,437 @@
* Copyright (c) 2017 Stephen Heumann
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
* Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
macro
ROTR4 &loc,&n
aif &n>16,.dorotl
lda &loc+2
lcla &i
&i seta &n
.rotrloop
lsr a ;to set carry
ror &loc
ror &loc+2
&i seta &i-1
aif &i>0,.rotrloop
ago .end
.dorotl
ROTL4 &loc,32-&n
.end
mend
* Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
macro
ROTL4 &loc,&n
aif &n>16,.dorotr2
lda &loc
lcla &i
&i seta &n
.rotlloop2
asl a ;to set carry
rol &loc+2
rol &loc
&i seta &i-1
aif &i>0,.rotlloop2
ago .end2
.dorotr2
ROTR4 &loc,32-&n
.end2
mend
* &to := &from ROTR4 &n
macro
ROTR4MOVE &to,&from,&n
aif &n>16,.dorotl3
lda &from
sta &to
lda &from+2
sta &to+2
lcla &i
&i seta &n
.rotrloop3
lsr a ;to set carry
ror &to
ror &to+2
&i seta &i-1
aif &i>0,.rotrloop3
ago .end3
.dorotl3
ROTL4MOVE &to,&from,32-&n
.end3
mend
* &to := &from ROTL4 &n
macro
ROTL4MOVE &to,&from,&n
aif &n>16,.dorotr4
lda &from+2
sta &to+2
lda &from
sta &to
lcla &i
&i seta &n
.rotlloop4
asl a ;to set carry
rol &to+2
rol &to
&i seta &i-1
aif &i>0,.rotlloop4
ago .end4
.dorotr4
ROTR4MOVE &to,&from,32-&n
.end4
mend
* This makes a function wrapper that is callable from C,
* taking a pointer to the context structure as its argument.
macro
CFunction &fn
phb
plx
ply
tdc
pld
plb
plb
phy
phx
plb
pha
jsl &fn
pld
rtl
mend
* Macros to operate on elements of the message schedule (W)
macro
&lab lda_w &i,&inc
lcla &j
&j seta &i
.modloop1
aif &j<16,.goodidx1
&j seta &j-16
ago .modloop1
.goodidx1
aif C:&inc<>0,.haveinc1
lcla &inc
.haveinc1
&lab lda w+(&j)*4+&inc
mend
macro
&lab eor_w &i,&inc
lcla &j
&j seta &i
.modloop2
aif &j<16,.goodidx2
&j seta &j-16
ago .modloop2
.goodidx2
aif C:&inc<>0,.haveinc2
lcla &inc
.haveinc2
&lab eor w+(&j)*4+&inc
mend
macro
&lab sta_w &i,&inc
lcla &j
&j seta &i
.modloop3
aif &j<16,.goodidx3
&j seta &j-16
ago .modloop3
.goodidx3
aif C:&inc<>0,.haveinc3
lcla &inc
.haveinc3
&lab sta w+(&j)*4+&inc
mend
macro
&lab adc_w &i,&inc
lcla &j
&j seta &i
.modloop4
aif &j<16,.goodidx4
&j seta &j-16
ago .modloop4
.goodidx4
aif C:&inc<>0,.haveinc4
lcla &inc
.haveinc4
&lab adc w+(&j)*4+&inc
mend
macro
ROTR4MOVE_w &to,&from,&n
lcla &j
&j seta &from
.modloop5
aif &j<16,.goodidx5
&j seta &j-16
ago .modloop5
.goodidx5
aif &n>16,.dorotl4
lda w+(&j)*4
sta &to
lda w+(&j)*4+2
sta &to+2
lcla &i
&i seta &n
.rotrloop4
lsr a ;to set carry
ror &to
ror &to+2
&i seta &i-1
aif &i>0,.rotrloop4
ago .end4
.dorotl4
ROTL4MOVE &to,w+(&j)*4,32-&n
.end4
mend
* Compute one part of the message schedule (16 elements)
macro
ComputeSchedule &part
lcla &i
; Flip the endianness of W_0 to W_15 (the current block of the message)
aif &part<>1,.skippart1
.loop1
lda w+&i*4
xba
ldx w+&i*4+2
sta w+&i*4+2
txa
xba
sta w+&i*4
&i seta &i+1
aif &i<16,.loop1
ago .end
.skippart1
; compute the rest of the message schedule (W_16 to W_63)
&i seta (&part-1)*16
.loop2
ROTR4MOVE_w temp1,&i-15,7
ROTR4MOVE_w temp2,&i-15,18
ROTR4MOVE_w temp3,&i-15,3
lda temp3
eor temp2
eor temp1
sta temp1
lda temp3+2
and #$1FFF
eor temp2+2
eor temp1+2
sta temp1+2
ROTR4MOVE_w temp2,&i-2,17
ROTR4MOVE_w temp3,&i-2,19
ROTR4MOVE_w temp4,&i-2,10
lda temp4
eor temp3
eor temp2
sta temp2
lda temp4+2
and #$003F
eor temp3+2
eor temp2+2
sta temp2+2
clc
lda_w &i-16
adc_w &i-7
tay
lda_w &i-16,2
adc_w &i-7,2
tax
clc
tya
adc temp1
tay
txa
adc temp1+2
tax
clc
tya
adc temp2
sta_w &i
txa
adc temp2+2
sta_w &i,2
&i seta &i+1
aif &i<&part*16,.loop2
.end
mend
* One iteration of the loop for processing blocks.
* The a,b,c,d,e,f,g,h variables are given as parameters so we can avoid
* cycling them.
macro
BlockLoopIter &a,&b,&c,&d,&e,&f,&g,&h,&iter
; Sigma_1 computation
ROTR4MOVE temp1,&e,6
ROTR4MOVE temp2,&e,11
ROTR4MOVE temp3,&e,25
lda temp1
eor temp2
eor temp3
sta temp1
lda temp1+2
eor temp2+2
eor temp3+2
sta temp1+2
; ch computation
lda &f
eor &g
and &e
eor &g
sta temp2
lda &f+2
eor &g+2
and &e+2
eor &g+2
sta temp2+2
; T_1 computation
clc
ldx idx
lda w+&iter*4,x
adc &h
tay
lda w+&iter*4+2,x
adc &h+2
tax
clc
tya
adc temp1
tay
txa
adc temp1+2
tax
clc
tya
adc temp2
tay
txa
adc temp2+2
tax
clc
tya
adc (k_ptr)
sta temp1
txa
inc k_ptr
inc k_ptr
adc (k_ptr)
sta temp1+2
inc k_ptr
inc k_ptr
;Sigma_0 computation
ROTR4MOVE temp2,&a,2
ROTR4MOVE temp3,&a,13
ROTR4MOVE temp4,&a,22
lda temp2
eor temp3
eor temp4
sta temp2
lda temp2+2
eor temp3+2
eor temp4+2
sta temp2+2
;maj computation
lda &a
ora &b
and &c
sta temp3
lda &a
and &b
ora temp3
sta temp3
lda &a+2
ora &b+2
and &c+2
sta temp3+2
lda &a+2
and &b+2
ora temp3+2
sta temp3+2
;T_2 computation
clc
lda temp2
adc temp3
sta temp2
lda temp2+2
adc temp3+2
sta temp2+2
clc
lda &d
adc temp1
sta &d
lda &d+2
adc temp1+2
sta &d+2
clc
lda temp1
adc temp2
sta &h
lda temp1+2
adc temp2+2
sta &h+2
mend
* One part of the loop for processing blocks (16 iterations)
macro
BlockLoopPart &part
stz idx
loop&part anop
BlockLoopIter a_,b,c,d,e,f,g,h,0
BlockLoopIter h,a_,b,c,d,e,f,g,1
BlockLoopIter g,h,a_,b,c,d,e,f,2
BlockLoopIter f,g,h,a_,b,c,d,e,3
BlockLoopIter e,f,g,h,a_,b,c,d,4
BlockLoopIter d,e,f,g,h,a_,b,c,5
BlockLoopIter c,d,e,f,g,h,a_,b,6
BlockLoopIter b,c,d,e,f,g,h,a_,7
clc
lda idx
adc #4*8
cmp #16*4
bge endloop&part
sta idx
jmp loop&part
endloop&part anop
mend

107
sha256test.c Normal file
View File

@ -0,0 +1,107 @@
/*
* Copyright (c) 2017 Stephen Heumann
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include "sha256.h"
#include <stdio.h>
#include <MiscTool.h>
#include <Memory.h>
#include <orca.h>
#include <string.h>
int main(int argc, char **argv) {
unsigned int i;
unsigned long tick_count;
long double bytes_per_sec;
struct sha256_context *context, **context_hndl;
struct sha256_context context_init = {0,0,0,0, {0}, {0}, {0},
{0x61,0x62,0x63,0x80,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x00,
0x00,0x00,0x00,0x18
},
{0}};
context_hndl = (struct sha256_context **)NewHandle(sizeof(struct sha256_context),
userid(), attrFixed|attrPage|attrBank|attrNoCross, 0x000000);
if (toolerror())
return 0;
context = *context_hndl;
*context = context_init;
sha256_init(context);
sha256_processblock(context);
printf("a...h = ");
for (i = 0; i < 8; i++) {
printf("%08lx ", context->vars[i]);
}
printf("\n");
printf("h[..] = %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x "
"%02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
context->hash[3], context->hash[2], context->hash[1], context->hash[0],
context->hash[7], context->hash[6], context->hash[5], context->hash[4],
context->hash[11], context->hash[10], context->hash[9], context->hash[8],
context->hash[15], context->hash[14], context->hash[13], context->hash[12],
context->hash[19], context->hash[18], context->hash[17], context->hash[16],
context->hash[23], context->hash[22], context->hash[21], context->hash[20],
context->hash[27], context->hash[26], context->hash[25], context->hash[24],
context->hash[31], context->hash[30], context->hash[29], context->hash[28]);
tick_count = GetTick();
for (i = 0; i < 1000; i++) {
sha256_processblock(context);
}
tick_count = GetTick() - tick_count;
bytes_per_sec = (long double)1000 * 64 * 60 / tick_count;
printf("Time for 1000 iters = %lu ticks (%lf bytes/sec)\n", tick_count, bytes_per_sec);
/*
tick_count = GetTick();
sha256_init(context);
sha256_update(context, (void*)0x030000, 64000);
sha256_finalize(context);
tick_count = GetTick() - tick_count;
bytes_per_sec = (long double)1000 * 64 * 60 / tick_count;
printf("Append time = %lu ticks (%lf bytes/sec)\n", tick_count, bytes_per_sec);
if (argc > 1) {
sha256_init(context);
sha256_update(context, argv[1], strlen(argv[1]));
sha256_finalize(context);
printf("h[..] = %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
context->hash[0], context->hash[1], context->hash[2], context->hash[3],
context->hash[4], context->hash[5], context->hash[6], context->hash[7],
context->hash[8], context->hash[9], context->hash[10], context->hash[11],
context->hash[12], context->hash[13], context->hash[14], context->hash[15],
context->hash[16], context->hash[17], context->hash[18], context->hash[19]);
}
*/
}