From 91c107eb981030e91c15ace7b24a596cf7deb226 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Mon, 3 Jul 2017 14:27:19 -0500 Subject: [PATCH] Add initial implementation of the basic block-processing function for SHA-256. --- sha256.asm | 253 +++++++++++++++++++++++++++++ sha256.h | 53 ++++++ sha256.macros | 437 ++++++++++++++++++++++++++++++++++++++++++++++++++ sha256test.c | 107 ++++++++++++ 4 files changed, 850 insertions(+) create mode 100644 sha256.asm create mode 100644 sha256.h create mode 100644 sha256.macros create mode 100644 sha256test.c diff --git a/sha256.asm b/sha256.asm new file mode 100644 index 0000000..0feb596 --- /dev/null +++ b/sha256.asm @@ -0,0 +1,253 @@ +* Copyright (c) 2017 Stephen Heumann +* +* Permission to use, copy, modify, and distribute this software for any +* purpose with or without fee is hereby granted, provided that the above +* copyright notice and this permission notice appear in all copies. +* +* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +* Implementation of the SHA-256 (and SHA-224) hash function for the 65816 +* +* The basic structure of the hash computation is described in FIPS PUB 180-4, +* although this implementation rearranges some things for better performance. + + case on + mcopy sha256.macros + +* Direct page locations +length gequ 0 +extra gequ 8 +idx gequ 10 +a_ gequ 12 ; elements of state +b gequ 16 +c gequ 20 +d gequ 24 +e gequ 28 +f gequ 32 +g gequ 36 +h gequ 40 +temp1 gequ 44 +temp2 gequ 48 +ch gequ 52 +maj gequ 56 +h0 gequ 60 +h1 gequ 64 +h2 gequ 68 +h3 gequ 72 +h4 gequ 76 +h5 gequ 80 +h6 gequ 84 +h7 gequ 88 +w gequ 92 +temp3 gequ 156 +temp4 gequ 160 +k_ptr gequ 164 + + +k private + dc i4'$428a2f98, $71374491, $b5c0fbcf, $e9b5dba5' + dc i4'$3956c25b, $59f111f1, $923f82a4, $ab1c5ed5' + dc i4'$d807aa98, $12835b01, $243185be, $550c7dc3' + dc i4'$72be5d74, $80deb1fe, $9bdc06a7, $c19bf174' + dc i4'$e49b69c1, $efbe4786, $0fc19dc6, $240ca1cc' + dc i4'$2de92c6f, $4a7484aa, $5cb0a9dc, $76f988da' + dc i4'$983e5152, $a831c66d, $b00327c8, $bf597fc7' + dc i4'$c6e00bf3, $d5a79147, $06ca6351, $14292967' + dc i4'$27b70a85, $2e1b2138, $4d2c6dfc, $53380d13' + dc i4'$650a7354, $766a0abb, $81c2c92e, $92722c85' + dc i4'$a2bfe8a1, $a81a664b, $c24b8b70, $c76c51a3' + dc i4'$d192e819, $d6990624, $f40e3585, $106aa070' + dc i4'$19a4c116, $1e376c08, $2748774c, $34b0bcb5' + dc i4'$391c0cb3, $4ed8aa4a, $5b9cca4f, $682e6ff3' + dc i4'$748f82ee, $78a5636f, $84c87814, $8cc70208' + dc i4'$90befffa, $a4506ceb, $bef9a3f7, $c67178f2' + end + +* Initialize a SHA-256 context. +* This must be called before any of the other SHA-256 functions. +sha256_init start + CFunction SHA256_INIT + end + +SHA256_INIT start + lda #$e667 + sta h0 + lda #$6a09 + sta h0+2 + lda #$ae85 + sta h1 + lda #$bb67 + sta h1+2 + lda #$f372 + sta h2 + lda #$3c6e + sta h2+2 + lda #$f53a + sta h3 + lda #$a54f + sta h3+2 + lda #$527f + sta h4 + lda #$510e + sta h4+2 + lda #$688c + sta h5 + lda #$9b05 + sta h5+2 + lda #$d9ab + sta h6 + lda #$1f83 + sta h6+2 + lda #$cd19 + sta h7 + lda #$5be0 + sta h7+2 + + stz length + stz length+2 + stz length+4 + stz length+6 + stz extra + rtl + end + + +* Process one 64-byte block through the SHA-256 hashing function. +* This is a low-level function; users should normally not call this directly. +sha256_processblock start + CFunction SHA256_PROCESSBLOCK + end + +SHA256_PROCESSBLOCK start + lda h0 + sta a_ + lda h0+2 + sta a_+2 + + lda h1 + sta b + lda h1+2 + sta b+2 + + lda h2 + sta c + lda h2+2 + sta c+2 + + lda h3 + sta d + lda h3+2 + sta d+2 + + lda h4 + sta e + lda h4+2 + sta e+2 + + lda h5 + sta f + lda h5+2 + sta f+2 + + lda h6 + sta g + lda h6+2 + sta g+2 + + lda h6 + sta g + lda h6+2 + sta g+2 + + lda h7 + sta h + lda h7+2 + sta h+2 + + lda #k + sta k_ptr + ComputeSchedule 1 + BlockLoopPart 1 + jsr ComputeScheduleSub + BlockLoopPart 2 + jsr ComputeScheduleSub + BlockLoopPart 3 + jsr ComputeScheduleSub + BlockLoopPart 4 + +endloop clc + lda h0 + adc a_ + sta h0 + lda h0+2 + adc a_+2 + sta h0+2 + + clc + lda h1 + adc b + sta h1 + lda h1+2 + adc b+2 + sta h1+2 + + clc + lda h2 + adc c + sta h2 + lda h2+2 + adc c+2 + sta h2+2 + + clc + lda h3 + adc d + sta h3 + lda h3+2 + adc d+2 + sta h3+2 + + clc + lda h4 + adc e + sta h4 + lda h4+2 + adc e+2 + sta h4+2 + + clc + lda h5 + adc f + sta h5 + lda h5+2 + adc f+2 + sta h5+2 + + clc + lda h6 + adc g + sta h6 + lda h6+2 + adc g+2 + sta h6+2 + + clc + lda h7 + adc h + sta h7 + lda h7+2 + adc h+2 + sta h7+2 + rtl + +ComputeScheduleSub anop + ComputeSchedule 2 + rts + end diff --git a/sha256.h b/sha256.h new file mode 100644 index 0000000..b035cc8 --- /dev/null +++ b/sha256.h @@ -0,0 +1,53 @@ +/* + * Copyright (c) 2017 Stephen Heumann + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct sha256_context { + unsigned long length; + unsigned long length2; + unsigned short extra; + unsigned short idx; + unsigned long vars[8]; + unsigned char reserved1[16]; + unsigned char hash[32]; + unsigned char block[64]; + unsigned char reserved2[16]; +}; + +/* + * The context structure must be in bank 0, preferably page-aligned. + */ + +/* + * Initialize a SHA-256 context. + * This must be called before any of the other SHA-256 functions. + */ +void sha256_init(struct sha256_context *context); + +/* + * Update a SHA-256 context based on the specified data. + */ +void sha256_update(struct sha256_context *context, const unsigned char *data, unsigned long length); + +/* + * Finish SHA-256 processing and generate the final hash code. + */ +void sha256_finalize(struct sha256_context *context); + +/* + * Process one 64-byte block through the SHA-256 hashing function. + * This is a low-level function; users should normally not call this directly. + */ +void sha256_processblock(struct sha256_context *context); diff --git a/sha256.macros b/sha256.macros new file mode 100644 index 0000000..878c66f --- /dev/null +++ b/sha256.macros @@ -0,0 +1,437 @@ +* Copyright (c) 2017 Stephen Heumann +* +* Permission to use, copy, modify, and distribute this software for any +* purpose with or without fee is hereby granted, provided that the above +* copyright notice and this permission notice appear in all copies. +* +* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES +* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF +* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR +* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES +* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN +* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF +* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + + +* Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions + macro + ROTR4 &loc,&n + aif &n>16,.dorotl + lda &loc+2 + lcla &i +&i seta &n +.rotrloop + lsr a ;to set carry + ror &loc + ror &loc+2 +&i seta &i-1 + aif &i>0,.rotrloop + ago .end +.dorotl + ROTL4 &loc,32-&n +.end + mend + +* Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions + macro + ROTL4 &loc,&n + aif &n>16,.dorotr2 + lda &loc + lcla &i +&i seta &n +.rotlloop2 + asl a ;to set carry + rol &loc+2 + rol &loc +&i seta &i-1 + aif &i>0,.rotlloop2 + ago .end2 +.dorotr2 + ROTR4 &loc,32-&n +.end2 + mend + +* &to := &from ROTR4 &n + macro + ROTR4MOVE &to,&from,&n + aif &n>16,.dorotl3 + lda &from + sta &to + lda &from+2 + sta &to+2 + lcla &i +&i seta &n +.rotrloop3 + lsr a ;to set carry + ror &to + ror &to+2 +&i seta &i-1 + aif &i>0,.rotrloop3 + ago .end3 +.dorotl3 + ROTL4MOVE &to,&from,32-&n +.end3 + mend + +* &to := &from ROTL4 &n + macro + ROTL4MOVE &to,&from,&n + aif &n>16,.dorotr4 + lda &from+2 + sta &to+2 + lda &from + sta &to + lcla &i +&i seta &n +.rotlloop4 + asl a ;to set carry + rol &to+2 + rol &to +&i seta &i-1 + aif &i>0,.rotlloop4 + ago .end4 +.dorotr4 + ROTR4MOVE &to,&from,32-&n +.end4 + mend + + +* This makes a function wrapper that is callable from C, +* taking a pointer to the context structure as its argument. + macro + CFunction &fn + phb + plx + ply + tdc + pld + plb + plb + phy + phx + plb + pha + jsl &fn + pld + rtl + mend + + +* Macros to operate on elements of the message schedule (W) + macro +&lab lda_w &i,&inc + lcla &j +&j seta &i +.modloop1 + aif &j<16,.goodidx1 +&j seta &j-16 + ago .modloop1 +.goodidx1 + aif C:&inc<>0,.haveinc1 + lcla &inc +.haveinc1 +&lab lda w+(&j)*4+&inc + mend + + macro +&lab eor_w &i,&inc + lcla &j +&j seta &i +.modloop2 + aif &j<16,.goodidx2 +&j seta &j-16 + ago .modloop2 +.goodidx2 + aif C:&inc<>0,.haveinc2 + lcla &inc +.haveinc2 +&lab eor w+(&j)*4+&inc + mend + + macro +&lab sta_w &i,&inc + lcla &j +&j seta &i +.modloop3 + aif &j<16,.goodidx3 +&j seta &j-16 + ago .modloop3 +.goodidx3 + aif C:&inc<>0,.haveinc3 + lcla &inc +.haveinc3 +&lab sta w+(&j)*4+&inc + mend + + macro +&lab adc_w &i,&inc + lcla &j +&j seta &i +.modloop4 + aif &j<16,.goodidx4 +&j seta &j-16 + ago .modloop4 +.goodidx4 + aif C:&inc<>0,.haveinc4 + lcla &inc +.haveinc4 +&lab adc w+(&j)*4+&inc + mend + + macro + ROTR4MOVE_w &to,&from,&n + lcla &j +&j seta &from +.modloop5 + aif &j<16,.goodidx5 +&j seta &j-16 + ago .modloop5 +.goodidx5 + aif &n>16,.dorotl4 + lda w+(&j)*4 + sta &to + lda w+(&j)*4+2 + sta &to+2 + lcla &i +&i seta &n +.rotrloop4 + lsr a ;to set carry + ror &to + ror &to+2 +&i seta &i-1 + aif &i>0,.rotrloop4 + ago .end4 +.dorotl4 + ROTL4MOVE &to,w+(&j)*4,32-&n +.end4 + mend + +* Compute one part of the message schedule (16 elements) + macro + ComputeSchedule &part + lcla &i + +; Flip the endianness of W_0 to W_15 (the current block of the message) + aif &part<>1,.skippart1 +.loop1 + lda w+&i*4 + xba + ldx w+&i*4+2 + sta w+&i*4+2 + txa + xba + sta w+&i*4 +&i seta &i+1 + aif &i<16,.loop1 + ago .end +.skippart1 + +; compute the rest of the message schedule (W_16 to W_63) +&i seta (&part-1)*16 +.loop2 + ROTR4MOVE_w temp1,&i-15,7 + ROTR4MOVE_w temp2,&i-15,18 + ROTR4MOVE_w temp3,&i-15,3 + lda temp3 + eor temp2 + eor temp1 + sta temp1 + lda temp3+2 + and #$1FFF + eor temp2+2 + eor temp1+2 + sta temp1+2 + + ROTR4MOVE_w temp2,&i-2,17 + ROTR4MOVE_w temp3,&i-2,19 + ROTR4MOVE_w temp4,&i-2,10 + lda temp4 + eor temp3 + eor temp2 + sta temp2 + lda temp4+2 + and #$003F + eor temp3+2 + eor temp2+2 + sta temp2+2 + + clc + lda_w &i-16 + adc_w &i-7 + tay + lda_w &i-16,2 + adc_w &i-7,2 + tax + clc + tya + adc temp1 + tay + txa + adc temp1+2 + tax + clc + tya + adc temp2 + sta_w &i + txa + adc temp2+2 + sta_w &i,2 + +&i seta &i+1 + aif &i<&part*16,.loop2 +.end + mend + + + +* One iteration of the loop for processing blocks. +* The a,b,c,d,e,f,g,h variables are given as parameters so we can avoid +* cycling them. + macro + BlockLoopIter &a,&b,&c,&d,&e,&f,&g,&h,&iter + +; Sigma_1 computation + ROTR4MOVE temp1,&e,6 + ROTR4MOVE temp2,&e,11 + ROTR4MOVE temp3,&e,25 + lda temp1 + eor temp2 + eor temp3 + sta temp1 + lda temp1+2 + eor temp2+2 + eor temp3+2 + sta temp1+2 + +; ch computation + lda &f + eor &g + and &e + eor &g + sta temp2 + lda &f+2 + eor &g+2 + and &e+2 + eor &g+2 + sta temp2+2 + +; T_1 computation + clc + ldx idx + lda w+&iter*4,x + adc &h + tay + lda w+&iter*4+2,x + adc &h+2 + tax + clc + tya + adc temp1 + tay + txa + adc temp1+2 + tax + clc + tya + adc temp2 + tay + txa + adc temp2+2 + tax + clc + tya + adc (k_ptr) + sta temp1 + txa + inc k_ptr + inc k_ptr + adc (k_ptr) + sta temp1+2 + inc k_ptr + inc k_ptr + +;Sigma_0 computation + ROTR4MOVE temp2,&a,2 + ROTR4MOVE temp3,&a,13 + ROTR4MOVE temp4,&a,22 + lda temp2 + eor temp3 + eor temp4 + sta temp2 + lda temp2+2 + eor temp3+2 + eor temp4+2 + sta temp2+2 + +;maj computation + lda &a + ora &b + and &c + sta temp3 + lda &a + and &b + ora temp3 + sta temp3 + lda &a+2 + ora &b+2 + and &c+2 + sta temp3+2 + lda &a+2 + and &b+2 + ora temp3+2 + sta temp3+2 + +;T_2 computation + clc + lda temp2 + adc temp3 + sta temp2 + lda temp2+2 + adc temp3+2 + sta temp2+2 + + clc + lda &d + adc temp1 + sta &d + lda &d+2 + adc temp1+2 + sta &d+2 + + clc + lda temp1 + adc temp2 + sta &h + lda temp1+2 + adc temp2+2 + sta &h+2 + + mend + + +* One part of the loop for processing blocks (16 iterations) + macro + BlockLoopPart &part + + stz idx +loop&part anop + + BlockLoopIter a_,b,c,d,e,f,g,h,0 + BlockLoopIter h,a_,b,c,d,e,f,g,1 + BlockLoopIter g,h,a_,b,c,d,e,f,2 + BlockLoopIter f,g,h,a_,b,c,d,e,3 + BlockLoopIter e,f,g,h,a_,b,c,d,4 + BlockLoopIter d,e,f,g,h,a_,b,c,5 + BlockLoopIter c,d,e,f,g,h,a_,b,6 + BlockLoopIter b,c,d,e,f,g,h,a_,7 + + clc + lda idx + adc #4*8 + cmp #16*4 + bge endloop&part + sta idx + jmp loop&part +endloop&part anop + mend + diff --git a/sha256test.c b/sha256test.c new file mode 100644 index 0000000..3be187b --- /dev/null +++ b/sha256test.c @@ -0,0 +1,107 @@ +/* + * Copyright (c) 2017 Stephen Heumann + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include "sha256.h" +#include +#include +#include +#include +#include + +int main(int argc, char **argv) { + unsigned int i; + unsigned long tick_count; + long double bytes_per_sec; + + struct sha256_context *context, **context_hndl; + struct sha256_context context_init = {0,0,0,0, {0}, {0}, {0}, + {0x61,0x62,0x63,0x80, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x00, + 0x00,0x00,0x00,0x18 + }, + {0}}; + + context_hndl = (struct sha256_context **)NewHandle(sizeof(struct sha256_context), + userid(), attrFixed|attrPage|attrBank|attrNoCross, 0x000000); + if (toolerror()) + return 0; + context = *context_hndl; + *context = context_init; + + sha256_init(context); + sha256_processblock(context); + + printf("a...h = "); + for (i = 0; i < 8; i++) { + printf("%08lx ", context->vars[i]); + } + printf("\n"); + + printf("h[..] = %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x " + "%02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n", + context->hash[3], context->hash[2], context->hash[1], context->hash[0], + context->hash[7], context->hash[6], context->hash[5], context->hash[4], + context->hash[11], context->hash[10], context->hash[9], context->hash[8], + context->hash[15], context->hash[14], context->hash[13], context->hash[12], + context->hash[19], context->hash[18], context->hash[17], context->hash[16], + context->hash[23], context->hash[22], context->hash[21], context->hash[20], + context->hash[27], context->hash[26], context->hash[25], context->hash[24], + context->hash[31], context->hash[30], context->hash[29], context->hash[28]); + + tick_count = GetTick(); + for (i = 0; i < 1000; i++) { + sha256_processblock(context); + } + tick_count = GetTick() - tick_count; + + bytes_per_sec = (long double)1000 * 64 * 60 / tick_count; + printf("Time for 1000 iters = %lu ticks (%lf bytes/sec)\n", tick_count, bytes_per_sec); +/* + tick_count = GetTick(); + sha256_init(context); + sha256_update(context, (void*)0x030000, 64000); + sha256_finalize(context); + tick_count = GetTick() - tick_count; + bytes_per_sec = (long double)1000 * 64 * 60 / tick_count; + printf("Append time = %lu ticks (%lf bytes/sec)\n", tick_count, bytes_per_sec); + + if (argc > 1) { + sha256_init(context); + sha256_update(context, argv[1], strlen(argv[1])); + sha256_finalize(context); + + printf("h[..] = %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n", + context->hash[0], context->hash[1], context->hash[2], context->hash[3], + context->hash[4], context->hash[5], context->hash[6], context->hash[7], + context->hash[8], context->hash[9], context->hash[10], context->hash[11], + context->hash[12], context->hash[13], context->hash[14], context->hash[15], + context->hash[16], context->hash[17], context->hash[18], context->hash[19]); + } +*/ +}