Use separate loops for the four parts of the SHA-1 computation.

2024-06-16 03:29:26 +00:00 · 2017-06-29 20:51:36 -05:00 · 2017-06-29 20:51:36 -05:00 · 7182fc5817
commit 7182fc5817
parent dfdb3186fd
2 changed files with 224 additions and 132 deletions
--- a/sha1.asm
+++ b/sha1.asm
@ -1,6 +1,5 @@
 	case	on
 	mcopy	sha1.macros
 	mcopy	rotate.macros
 * Direct page locations	
 ;chunk	gequ	0	; 8 bytes
@ -92,137 +91,10 @@ SHA1_PROCESSCHUNK start
 	sta	e+2
 	ldx	#0
-loop	anop
+	BlockLoopPart 1
-	ROTL4MOVE temp,a_,5
+	BlockLoopPart 2
-	stx	idx
+	BlockLoopPart 3
-	cpx	#60*4
+	BlockLoopPart 4
 	bge	f_60
 	cpx	#40*4
 	bge	f_40
 	cpx	#20*4
 	bge	f_20
 * f_0 to f_19
 f_0	lda	c
 	eor	d
 	and	b
 	eor	d
 	clc
 	adc	#$7999
 	sta	f_plus_k
 	lda	c+2
 	eor	d+2
 	and	b+2
 	eor	d+2
 	adc	#$5A82
 	sta	f_plus_k+2
 	bra	after_f
 * f_20 to f_39
 f_20	lda	b
 	eor	c
 	eor	d
 	clc
 	adc	#$EBA1
 	sta	f_plus_k
 	lda	b+2
 	eor	c+2
 	eor	d+2
 	adc	#$6ED9
 	sta	f_plus_k+2
 	bra	after_f
 * f_40 to f_59
 f_40	lda	c
 	ora	d
 	and	b
 	sta	f40temp
 	lda	c
 	and	d
 	ora	f40temp
 	clc
 	adc	#$BCDC
 	sta	f_plus_k
 	lda	c+2
 	ora	d+2
 	and	b+2
 	sta	f40temp
 	lda	c+2
 	and	d+2
 	ora	f40temp
 	adc	#$8F1B
 	sta	f_plus_k+2
 	bra	after_f
 * f_60 to f_79
 f_60	lda	b
 	eor	c
 	eor	d
 	clc
 	adc	#$C1D6
 	sta	f_plus_k
 	lda	b+2
 	eor	c+2
 	eor	d+2
 	adc	#$CA62
 	sta	f_plus_k+2
 after_f	anop
 	ldx	idx
 	clc
 	lda	w,x
 	adc	temp
 	tay
 	lda	w+2,x
 	adc	temp+2
 	tax
 	clc
 	tya
 	adc	e
 	tay
 	txa
 	adc	e+2
 	tax
 	clc
 	tya
 	adc	f_plus_k
 	tay
 	txa
 	adc	f_plus_k+2
 	tax
 	lda	d
 	sta	e
 	lda	d+2
 	sta	e+2
 	lda	c
 	sta	d
 	lda	c+2
 	sta	d+2
 	ROTL4MOVE c,b,30
 	lda	a_
 	sta	b
 	lda	a_+2
 	sta	b+2
 	sty	a_
 	stx	a_+2
 	ldx	idx
 	inx
 	inx
 	inx
 	inx
 	cpx	#80*4
 	bge	endloop
 	jmp	loop
 endloop clc
 	lda	h0
--- a/sha1.macros
+++ b/sha1.macros
@ -1,3 +1,86 @@
 * Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
 	macro
 	ROTR4	&loc,&n
 	aif	&n>16,.dorotl
 	lda	&loc+2
 	lcla	&i
 &i	seta	&n
 .rotrloop
 	lsr	a		;to set carry
 	ror     &loc
 	ror     &loc+2
 &i	seta	&i-1
 	aif	&i>0,.rotrloop
 	ago	.end
 .dorotl
 	ROTL4	&loc,32-&n
 .end
 	mend
 * Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions
 	macro
 	ROTL4	&loc,&n
 	aif	&n>16,.dorotr
        lda     &loc
 	lcla	&i
 &i	seta	&n
 .rotlloop
        asl     a		;to set carry
        rol     &loc+2
        rol     &loc
 &i	seta	&i-1
 	aif	&i>0,.rotlloop
 	ago	.end
 .dorotr
 	ROTR4	&loc,32-&n
 .end
 	mend
 * &to := &from ROTR4 &n
 	macro
 	ROTR4MOVE &to,&from,&n
 	aif	&n>16,.dorotl
        lda     &from
        sta     &to
        lda     &from+2
        sta     &to+2
 	lcla	&i
 &i	seta	&n
 .rotrloop
 	lsr	a		;to set carry
 	ror     &to
 	ror     &to+2
 &i	seta	&i-1
 	aif	&i>0,.rotrloop
 	ago	.end
 dorotl
 	ROTL4MOVE &to,&from,32-&n
 .end
 	mend
 * &to := &from ROTL4 &n
 	macro
 	ROTL4MOVE &to,&from,&n
 	aif	&n>16,.dorotr
        lda     &from+2
        sta     &to+2
        lda     &from
        sta     &to
 	lcla	&i
 &i	seta	&n
 .rotlloop
        asl     a		;to set carry
        rol     &to+2
        rol     &to
 &i	seta	&i-1
 	aif	&i>0,.rotlloop
 	ago	.end
 .dorotr
 	ROTR4MOVE &to,&from,32-&n
 .end
 	mend
 * This makes a function wrapper that is callable from C,
 * taking a pointer to the context structure as its argument.
 	macro
@ -130,3 +213,140 @@
 	aif	&i<80,.loop2
 	mend
 * One part of the loop for processing blocks (&part is 1, 2, 3, or 4)
 	macro
 	BlockLoopPart &part
 loop&part anop
 	stx	idx
 	ROTL4MOVE temp,a_,5
 * f_0 to f_19
 	aif	&part<>1,.skip1
 	lda	c
 	eor	d
 	and	b
 	eor	d
 	clc
 	adc	#$7999
 	sta	f_plus_k
 	lda	c+2
 	eor	d+2
 	and	b+2
 	eor	d+2
 	adc	#$5A82
 	sta	f_plus_k+2
 .skip1
 * f_20 to f_39
 	aif	&part<>2,.skip2
 	lda	b
 	eor	c
 	eor	d
 	clc
 	adc	#$EBA1
 	sta	f_plus_k
 	lda	b+2
 	eor	c+2
 	eor	d+2
 	adc	#$6ED9
 	sta	f_plus_k+2
 .skip2
 * f_40 to f_59
 	aif	&part<>3,.skip3
 	lda	c
 	ora	d
 	and	b
 	sta	f40temp
 	lda	c
 	and	d
 	ora	f40temp
 	clc
 	adc	#$BCDC
 	sta	f_plus_k
 	lda	c+2
 	ora	d+2
 	and	b+2
 	sta	f40temp
 	lda	c+2
 	and	d+2
 	ora	f40temp
 	adc	#$8F1B
 	sta	f_plus_k+2
 .skip3
 * f_60 to f_79
 	aif	&part<>4,.skip4
 	lda	b
 	eor	c
 	eor	d
 	clc
 	adc	#$C1D6
 	sta	f_plus_k
 	lda	b+2
 	eor	c+2
 	eor	d+2
 	adc	#$CA62
 	sta	f_plus_k+2
 .skip4
 	ldx	idx
 	clc
 	lda	w,x
 	adc	temp
 	tay
 	lda	w+2,x
 	adc	temp+2
 	tax
 	clc
 	tya
 	adc	e
 	tay
 	txa
 	adc	e+2
 	tax
 	clc
 	tya
 	adc	f_plus_k
 	tay
 	txa
 	adc	f_plus_k+2
 	tax
 	lda	d
 	sta	e
 	lda	d+2
 	sta	e+2
 	lda	c
 	sta	d
 	lda	c+2
 	sta	d+2
 	ROTL4MOVE c,b,30
 	lda	a_
 	sta	b
 	lda	a_+2
 	sta	b+2
 	sty	a_
 	stx	a_+2
 	ldx	idx
 	inx
 	inx
 	inx
 	inx
 	cpx	#&part*20*4
 	bge	endloop&part
 	jmp	loop&part
 endloop&part anop
 	mend