From 7182fc581797f037d85e48e62c69913f8b5b2d74 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Thu, 29 Jun 2017 20:51:36 -0500 Subject: [PATCH] Use separate loops for the four parts of the SHA-1 computation. --- sha1.asm | 136 +------------------------------- sha1.macros | 220 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 224 insertions(+), 132 deletions(-) diff --git a/sha1.asm b/sha1.asm index 52e27b5..ab09892 100644 --- a/sha1.asm +++ b/sha1.asm @@ -1,6 +1,5 @@ case on mcopy sha1.macros - mcopy rotate.macros * Direct page locations ;chunk gequ 0 ; 8 bytes @@ -92,137 +91,10 @@ SHA1_PROCESSCHUNK start sta e+2 ldx #0 -loop anop - ROTL4MOVE temp,a_,5 - stx idx - cpx #60*4 - bge f_60 - cpx #40*4 - bge f_40 - cpx #20*4 - bge f_20 - -* f_0 to f_19 -f_0 lda c - eor d - and b - eor d - clc - adc #$7999 - sta f_plus_k - - lda c+2 - eor d+2 - and b+2 - eor d+2 - adc #$5A82 - sta f_plus_k+2 - bra after_f - -* f_20 to f_39 -f_20 lda b - eor c - eor d - clc - adc #$EBA1 - sta f_plus_k - - lda b+2 - eor c+2 - eor d+2 - adc #$6ED9 - sta f_plus_k+2 - bra after_f - -* f_40 to f_59 -f_40 lda c - ora d - and b - sta f40temp - lda c - and d - ora f40temp - clc - adc #$BCDC - sta f_plus_k - - lda c+2 - ora d+2 - and b+2 - sta f40temp - lda c+2 - and d+2 - ora f40temp - adc #$8F1B - sta f_plus_k+2 - bra after_f - -* f_60 to f_79 -f_60 lda b - eor c - eor d - clc - adc #$C1D6 - sta f_plus_k - - lda b+2 - eor c+2 - eor d+2 - adc #$CA62 - sta f_plus_k+2 - -after_f anop - ldx idx - clc - lda w,x - adc temp - tay - lda w+2,x - adc temp+2 - tax - clc - tya - adc e - tay - txa - adc e+2 - tax - clc - tya - adc f_plus_k - tay - txa - adc f_plus_k+2 - tax - - lda d - sta e - lda d+2 - sta e+2 - - lda c - sta d - lda c+2 - sta d+2 - - ROTL4MOVE c,b,30 - - lda a_ - sta b - lda a_+2 - sta b+2 - - sty a_ - stx a_+2 - - ldx idx - inx - inx - inx - inx - cpx #80*4 - bge endloop - jmp loop + BlockLoopPart 1 + BlockLoopPart 2 + BlockLoopPart 3 + BlockLoopPart 4 endloop clc lda h0 diff --git a/sha1.macros b/sha1.macros index 6fe9cb2..35f9759 100644 --- a/sha1.macros +++ b/sha1.macros @@ -1,3 +1,86 @@ +* Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions + macro + ROTR4 &loc,&n + aif &n>16,.dorotl + lda &loc+2 + lcla &i +&i seta &n +.rotrloop + lsr a ;to set carry + ror &loc + ror &loc+2 +&i seta &i-1 + aif &i>0,.rotrloop + ago .end +.dorotl + ROTL4 &loc,32-&n +.end + mend + +* Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions + macro + ROTL4 &loc,&n + aif &n>16,.dorotr + lda &loc + lcla &i +&i seta &n +.rotlloop + asl a ;to set carry + rol &loc+2 + rol &loc +&i seta &i-1 + aif &i>0,.rotlloop + ago .end +.dorotr + ROTR4 &loc,32-&n +.end + mend + +* &to := &from ROTR4 &n + macro + ROTR4MOVE &to,&from,&n + aif &n>16,.dorotl + lda &from + sta &to + lda &from+2 + sta &to+2 + lcla &i +&i seta &n +.rotrloop + lsr a ;to set carry + ror &to + ror &to+2 +&i seta &i-1 + aif &i>0,.rotrloop + ago .end +dorotl + ROTL4MOVE &to,&from,32-&n +.end + mend + +* &to := &from ROTL4 &n + macro + ROTL4MOVE &to,&from,&n + aif &n>16,.dorotr + lda &from+2 + sta &to+2 + lda &from + sta &to + lcla &i +&i seta &n +.rotlloop + asl a ;to set carry + rol &to+2 + rol &to +&i seta &i-1 + aif &i>0,.rotlloop + ago .end +.dorotr + ROTR4MOVE &to,&from,32-&n +.end + mend + + * This makes a function wrapper that is callable from C, * taking a pointer to the context structure as its argument. macro @@ -130,3 +213,140 @@ aif &i<80,.loop2 mend + +* One part of the loop for processing blocks (&part is 1, 2, 3, or 4) + macro + BlockLoopPart &part + +loop&part anop + stx idx + ROTL4MOVE temp,a_,5 + +* f_0 to f_19 + aif &part<>1,.skip1 + lda c + eor d + and b + eor d + clc + adc #$7999 + sta f_plus_k + + lda c+2 + eor d+2 + and b+2 + eor d+2 + adc #$5A82 + sta f_plus_k+2 +.skip1 + +* f_20 to f_39 + aif &part<>2,.skip2 + lda b + eor c + eor d + clc + adc #$EBA1 + sta f_plus_k + + lda b+2 + eor c+2 + eor d+2 + adc #$6ED9 + sta f_plus_k+2 +.skip2 + +* f_40 to f_59 + aif &part<>3,.skip3 + lda c + ora d + and b + sta f40temp + lda c + and d + ora f40temp + clc + adc #$BCDC + sta f_plus_k + + lda c+2 + ora d+2 + and b+2 + sta f40temp + lda c+2 + and d+2 + ora f40temp + adc #$8F1B + sta f_plus_k+2 +.skip3 + +* f_60 to f_79 + aif &part<>4,.skip4 + lda b + eor c + eor d + clc + adc #$C1D6 + sta f_plus_k + + lda b+2 + eor c+2 + eor d+2 + adc #$CA62 + sta f_plus_k+2 +.skip4 + + ldx idx + clc + lda w,x + adc temp + tay + lda w+2,x + adc temp+2 + tax + clc + tya + adc e + tay + txa + adc e+2 + tax + clc + tya + adc f_plus_k + tay + txa + adc f_plus_k+2 + tax + + lda d + sta e + lda d+2 + sta e+2 + + lda c + sta d + lda c+2 + sta d+2 + + ROTL4MOVE c,b,30 + + lda a_ + sta b + lda a_+2 + sta b+2 + + sty a_ + stx a_+2 + + ldx idx + inx + inx + inx + inx + cpx #&part*20*4 + bge endloop&part + jmp loop&part +endloop&part anop + mend +