Compute SHA-1 message schedule 20 elements at a time, reusing storage.

The reduces the storage needed and boosts performance, since the whole schedule is contained in the direct page.
2025-02-19 17:30:33 +00:00 · 2017-06-29 22:22:06 -05:00 · 2017-06-29 22:22:06 -05:00 · aab47e38e7
commit aab47e38e7
parent 7182fc5817
2 changed files with 47 additions and 49 deletions
--- a/sha1.asm
+++ b/sha1.asm
@ -62,9 +62,6 @@ sha1_processchunk start
 	end

 SHA1_PROCESSCHUNK start
-
-	ComputeSchedule
-	
 	lda	h0
 	sta	a_
 	lda	h0+2
@ -90,10 +87,17 @@ SHA1_PROCESSCHUNK start
 	lda	h4+2
 	sta	e+2

+	ComputeSchedule 1
 	ldx	#0
 	BlockLoopPart 1
+	ComputeSchedule 2
+	ldx	#0
 	BlockLoopPart 2
+	ComputeSchedule 3
+	ldx	#0
 	BlockLoopPart 3
+	ComputeSchedule 4
+	ldx	#0
 	BlockLoopPart 4

 endloop clc
--- a/sha1.macros
+++ b/sha1.macros
@ -105,81 +105,72 @@ dorotl
 * Macros to operate on elements of the message schedule (W)
 	macro
 &lab	lda_w	&i,&inc
+	lcla	&j
+&j	seta	&i
+.modloop1
+	aif	&j<20,.goodidx1
+&j	seta	&j-20
+	ago	.modloop1
+.goodidx1
 	aif	C:&inc<>0,.haveinc
 	lcla	&inc
 .haveinc
-	aif	w+(&i)*4+&inc>255,.bigidx
-&lab	lda	w+(&i)*4+&inc
-	ago	.end
-.bigidx
-&lab	ldx	#((&i)-16)*4+&inc
-	lda	w+16*4,x
-.end
+&lab	lda	w+(&j)*4+&inc
 	mend

 	macro
 &lab	eor_w	&i,&inc
+	lcla	&j
+&j	seta	&i
+.modloop2
+	aif	&j<20,.goodidx2
+&j	seta	&j-20
+	ago	.modloop2
+.goodidx2
 	aif	C:&inc<>0,.haveinc
 	lcla	&inc
 .haveinc
-	aif	w+(&i)*4+&inc>255,.bigidx
-&lab	eor	w+(&i)*4+&inc
-	ago	.end
-.bigidx
-&lab	ldx	#((&i)-16)*4+&inc
-	eor	w+16*4,x
-.end
+&lab	eor	w+(&j)*4+&inc
 	mend

 	macro
 &lab	sta_w	&i,&inc
+	lcla	&j
+&j	seta	&i
+.modloop3
+	aif	&j<20,.goodidx3
+&j	seta	&j-20
+	ago	.modloop3
+.goodidx3
 	aif	C:&inc<>0,.haveinc
 	lcla	&inc
 .haveinc
-	aif	w+(&i)*4+&inc>255,.bigidx
-&lab	sta	w+(&i)*4+&inc
-	ago	.end
-.bigidx
-&lab	ldx	#((&i)-16)*4+&inc
-	sta	w+16*4,x
-.end
-	mend
-
-	macro
-&lab	inc_w	&i,&inc
-	aif	C:&inc<>0,.haveinc
-	lcla	&inc
-.haveinc
-	aif	w+(&i)*4+&inc>255,.bigidx
-&lab	inc	w+(&i)*4+&inc
-	ago	.end
-.bigidx
-&lab	ldx	#((&i)-16)*4+&inc
-	inc	w+16*4,x
-.end
+&lab	sta	w+(&j)*4+&inc
 	mend

 	macro
 &lab	rol_w	&i,&inc
+	lcla	&j
+&j	seta	&i
+.modloop4
+	aif	&j<20,.goodidx4
+&j	seta	&j-20
+	ago	.modloop4
+.goodidx4
 	aif	C:&inc<>0,.haveinc
 	lcla	&inc
 .haveinc
-	aif	w+(&i)*4+&inc>255,.bigidx
-&lab	rol	w+(&i)*4+&inc
-	ago	.end
-.bigidx
-&lab	ldx	#((&i)-16)*4+&inc
-	rol	w+16*4,x
-.end
+&lab	rol	w+(&j)*4+&inc
 	mend


-* Compute the message schedule (W_0 to W_79) 
+* Compute one part of the message schedule (20 elements)
 	macro
-	ComputeSchedule
+	ComputeSchedule &part
 	lcla	&i

 ; Flip the endianness of W_0 to W_15 (the current chunk of the message)
+	aif	&part<>1,.skippart1
 .loop1
 	lda	w+&i*4
 	xba
@ -190,8 +181,11 @@ dorotl
 	sta	w+&i*4
 &i	seta	&i+1
 	aif	&i<16,.loop1
+.skippart1

 ; compute the rest of the message schedule (W_16 to W_79)
+	aif	&part=1,.loop2
+&i	seta	(&part-1)*20
 .loop2
 	lda_w	&i-3
 	eor_w	&i-8
@ -210,7 +204,7 @@ dorotl
 	rol_w	&i
 	
 &i	seta	&i+1
-	aif	&i<80,.loop2
+	aif	&i<&part*20,.loop2
 	mend


@ -344,7 +338,7 @@ loop&part anop
 	inx
 	inx
 	inx
-	cpx	#&part*20*4
+	cpx	#20*4
 	bge	endloop&part
 	jmp	loop&part
 endloop&part anop