From aab47e38e7fa04855b5f975d634afecf18bdaad2 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Thu, 29 Jun 2017 22:22:06 -0500 Subject: [PATCH] Compute SHA-1 message schedule 20 elements at a time, reusing storage. The reduces the storage needed and boosts performance, since the whole schedule is contained in the direct page. --- sha1.asm | 10 +++++-- sha1.macros | 86 +++++++++++++++++++++++++---------------------------- 2 files changed, 47 insertions(+), 49 deletions(-) diff --git a/sha1.asm b/sha1.asm index ab09892..1ddd857 100644 --- a/sha1.asm +++ b/sha1.asm @@ -62,9 +62,6 @@ sha1_processchunk start end SHA1_PROCESSCHUNK start - - ComputeSchedule - lda h0 sta a_ lda h0+2 @@ -90,10 +87,17 @@ SHA1_PROCESSCHUNK start lda h4+2 sta e+2 + ComputeSchedule 1 ldx #0 BlockLoopPart 1 + ComputeSchedule 2 + ldx #0 BlockLoopPart 2 + ComputeSchedule 3 + ldx #0 BlockLoopPart 3 + ComputeSchedule 4 + ldx #0 BlockLoopPart 4 endloop clc diff --git a/sha1.macros b/sha1.macros index 35f9759..daf4ad8 100644 --- a/sha1.macros +++ b/sha1.macros @@ -105,81 +105,72 @@ dorotl * Macros to operate on elements of the message schedule (W) macro &lab lda_w &i,&inc + lcla &j +&j seta &i +.modloop1 + aif &j<20,.goodidx1 +&j seta &j-20 + ago .modloop1 +.goodidx1 aif C:&inc<>0,.haveinc lcla &inc .haveinc - aif w+(&i)*4+&inc>255,.bigidx -&lab lda w+(&i)*4+&inc - ago .end -.bigidx -&lab ldx #((&i)-16)*4+&inc - lda w+16*4,x -.end +&lab lda w+(&j)*4+&inc mend macro &lab eor_w &i,&inc + lcla &j +&j seta &i +.modloop2 + aif &j<20,.goodidx2 +&j seta &j-20 + ago .modloop2 +.goodidx2 aif C:&inc<>0,.haveinc lcla &inc .haveinc - aif w+(&i)*4+&inc>255,.bigidx -&lab eor w+(&i)*4+&inc - ago .end -.bigidx -&lab ldx #((&i)-16)*4+&inc - eor w+16*4,x -.end +&lab eor w+(&j)*4+&inc mend macro &lab sta_w &i,&inc + lcla &j +&j seta &i +.modloop3 + aif &j<20,.goodidx3 +&j seta &j-20 + ago .modloop3 +.goodidx3 aif C:&inc<>0,.haveinc lcla &inc .haveinc - aif w+(&i)*4+&inc>255,.bigidx -&lab sta w+(&i)*4+&inc - ago .end -.bigidx -&lab ldx #((&i)-16)*4+&inc - sta w+16*4,x -.end - mend - - macro -&lab inc_w &i,&inc - aif C:&inc<>0,.haveinc - lcla &inc -.haveinc - aif w+(&i)*4+&inc>255,.bigidx -&lab inc w+(&i)*4+&inc - ago .end -.bigidx -&lab ldx #((&i)-16)*4+&inc - inc w+16*4,x -.end +&lab sta w+(&j)*4+&inc mend macro &lab rol_w &i,&inc + lcla &j +&j seta &i +.modloop4 + aif &j<20,.goodidx4 +&j seta &j-20 + ago .modloop4 +.goodidx4 aif C:&inc<>0,.haveinc lcla &inc .haveinc - aif w+(&i)*4+&inc>255,.bigidx -&lab rol w+(&i)*4+&inc - ago .end -.bigidx -&lab ldx #((&i)-16)*4+&inc - rol w+16*4,x -.end +&lab rol w+(&j)*4+&inc mend -* Compute the message schedule (W_0 to W_79) +* Compute one part of the message schedule (20 elements) macro - ComputeSchedule + ComputeSchedule &part lcla &i ; Flip the endianness of W_0 to W_15 (the current chunk of the message) + aif &part<>1,.skippart1 .loop1 lda w+&i*4 xba @@ -190,8 +181,11 @@ dorotl sta w+&i*4 &i seta &i+1 aif &i<16,.loop1 +.skippart1 ; compute the rest of the message schedule (W_16 to W_79) + aif &part=1,.loop2 +&i seta (&part-1)*20 .loop2 lda_w &i-3 eor_w &i-8 @@ -210,7 +204,7 @@ dorotl rol_w &i &i seta &i+1 - aif &i<80,.loop2 + aif &i<&part*20,.loop2 mend @@ -344,7 +338,7 @@ loop&part anop inx inx inx - cpx #&part*20*4 + cpx #20*4 bge endloop&part jmp loop&part endloop&part anop