a2audit/shasum/shasum.a
Zellyn Hunter 54710c10a0 shasum: Longer tests, slightly shorter code
Added tests for 0x1ff- and 0x200-length inputs. Also changed code that
sets k constants to use constant values, rather than copying, to save
a few bytes.
2016-10-10 20:54:50 -04:00

618 lines
7.0 KiB
Plaintext

!to "shasum.o", plain
* = $6000
jmp main
;; clear addresses:
;; (http://apple2.org.za/gswv/a2zine/faqs/csa2pfaq.html#017)
;; 06-09
;; EB-EF
;; FA-FD
!addr SRC = $06
!addr DST = $08
!addr INPUT = $eb
!addr LENGTH = $ee
!addr TMP1 = $fa
!addr TMP2 = $fb
!addr PRBYTE = $FDDA
!addr COUT = $FDED
h0: !32 0 ; return value (hash)
h1: !32 0
h2: !32 0
h3: !32 0
h4: !32 0
h5:
ml: !32 0, 0 ; message length
w: !fill 64, 0
w_next: !fill 64, 0
a: !32 0
b: !32 0
c: !32 0
d: !32 0
e: !32 0
f: !32 0
temp: !32 0
k: !32 0
kh0: !be32 $67452301 ; initial values for h0..h4
kh1: !be32 $EFCDAB89
kh2: !be32 $98BADCFE
kh3: !be32 $10325476
kh4: !be32 $C3D2E1F0
k1 = $5A827999 ; k constants
k2 = $6ED9EBA1
k3 = $8F1BBCDC
k4 = $CA62C1D6
!macro set32 .target, .value {
lda #<(.value >> 24)
sta .target
lda #<(.value >> 16)
sta .target+1
lda #<(.value >> 8)
sta .target+2
lda #<(.value)
sta .target+3
}
!macro setSRC .source {
lda #<.source
sta SRC
lda #>.source
sta SRC+1
}
!macro setDST .dest {
lda #<.dest
sta DST
lda #>.dest
sta DST+1
}
;;; Print a string of bytes, as hex.
;;; Address in SRC, count in A.
;;; Burns A,Y.
prbytes:
ldy #0
- pha
lda (SRC),y
jsr PRBYTE
iny
pla
adc #$ff
bne -
rts
main:
;; Test shasum ""
lda #0
sta INPUT
lda #$fe
sta INPUT+1
lda #0
sta LENGTH+1
lda #0 ; da39a3ee5e6b4b0d3255bfef95601890afd80709
sta LENGTH
jsr shasum
; lda #$8d
; jsr COUT
+setSRC h0
lda #(h5-h0)
jsr prbytes
;; Test shasum FE00[:0x37]
lda #0
sta INPUT
lda #$fe
sta INPUT+1
lda #0
sta LENGTH+1
lda #$37 ; 1CF73FC6156B548A949D315120B5256245EAA33E
sta LENGTH
jsr shasum
; lda #$8d
; jsr COUT
+setSRC h0
lda #(h5-h0)
jsr prbytes
;; Test shasum FE00[:0x100]
lda #0
sta INPUT
lda #$fe
sta INPUT+1
lda #1
sta LENGTH+1
lda #0 ; 7B3D05347B52210065E27054FDFD0B8B699F0965
sta LENGTH
jsr shasum
; lda #$8d
; jsr COUT
+setSRC h0
lda #(h5-h0)
jsr prbytes
;; Test shasum FE00[:0x1ff]
lda #0
sta INPUT
lda #$fe
sta INPUT+1
lda #$1
sta LENGTH+1
lda #$ff ; 269CA6B0C644DAC01D908B20C10C0D5B19C52ABF
sta LENGTH
jsr shasum
; lda #$8d
; jsr COUT
+setSRC h0
lda #(h5-h0)
jsr prbytes
;; Test shasum FE00[:0x200]
lda #0
sta INPUT
lda #$fe
sta INPUT+1
lda #2
sta LENGTH+1
lda #0 ; D5AC71D5EE76E31CC82CF5136151BF4CDA503601
sta LENGTH
jsr shasum
; lda #$8d
; jsr COUT
+setSRC h0
lda #(h5-h0)
jsr prbytes
rts
shasum:
;; Initialize h0..h4
ldy #(h5-h0-1)
- lda kh0,y
sta h0,y
dey
bpl -
;; Initialize message length (ml)
lda #0
ldy #4
- sta ml, y
dey
bpl -
lda LENGTH
sta ml+7
lda LENGTH+1
sta ml+6
;; Message length is in bits
ldy #3
- asl ml+7
rol ml+6
rol ml+5
dey
bne -
;; Initialize chunk counter
;; ldy #0 ; already zero
;; Invert length so we can inc instead of dec
lda LENGTH
sec
lda #0
sbc LENGTH
sta LENGTH
lda #0
sbc LENGTH+1
sta LENGTH+1
ora LENGTH
beq .msgdone
.loop lda (INPUT),y
sta w,y
iny
cpy #$40
bne +
;; Call do_chunk
jsr do_chunk
ldy #0
clc
lda INPUT
adc #$40
sta INPUT
bcc +
inc INPUT+1
+ inc LENGTH
bne .loop
inc LENGTH+1
bne .loop
.msgdone:
lda #$80
sta w,y
iny
cpy #$40
bne .zeros
jsr do_chunk
ldy #0
.zeros
cpy #$38
beq .length
lda #0
sta w,y
iny
cpy #$40
bne .zeros
jsr do_chunk
ldy #0
jmp .zeros
.length
ldy #7
- lda ml,y
sta w+$38,y
dey
bpl -
jsr do_chunk
rts
;;; do_chunk processes a chunk of input. It burns A,X,Y,TMP1,TMP2.
do_chunk:
;; Copy a..e from h0..h4
ldy #(f-a-1)
- lda h0,y
sta a,y
dey
bpl -
ldy #0 ; y is index into w
;; First 20: k1
+set32 k, k1
ldx #16
- jsr kind1
dex
bne -
jsr fill
ldx #4
- jsr kind1
dex
bne -
;; Second 20: k2
+set32 k, k2
ldx #12
- jsr kind2
dex
bne -
jsr fill
ldx #8
- jsr kind2
dex
bne -
;; Third 20: k3
+set32 k, k3
ldx #8
- jsr kind3
dex
bne -
jsr fill
ldx #12
- jsr kind3
dex
bne -
;; Fourth 20: k4
+set32 k, k4
ldx #4
- jsr kind2
dex
bne -
jsr fill
ldx #16
- jsr kind2
dex
bne -
+setSRC a
+setDST h0
ldx #5
- jsr add32
clc
lda SRC
adc #4
sta SRC
lda DST
adc #4
sta DST
dex
bne -
rts
kind1:
sty TMP1
stx TMP2
;; f = d xor (b and (c xor d))
+setDST f
+setSRC d
jsr cp32
+setSRC c
jsr xor32
+setSRC b
jsr and32
+setSRC d
jsr xor32
jmp common
kind2:
sty TMP1
stx TMP2
;; f = b xor c xor d
+setDST f
+setSRC d
jsr cp32
+setSRC c
jsr xor32
+setSRC b
jsr xor32
jmp common
kind3:
sty TMP1
stx TMP2
;; f = (b and c) or (d and (b or c))
+setSRC c
+setDST f
jsr cp32
+setDST temp
jsr cp32
+setSRC b
jsr and32
+setDST f
jsr or32
+setSRC d
jsr and32
+setSRC temp
jsr or32
; jmp common
common:
;; temp = (a leftrotate 5) + f + e + k + w[i]
+setDST temp
+setSRC a
jsr cp32
jsr rol8
jsr ror1
jsr ror1
jsr ror1
+setSRC f
jsr add32
+setSRC e
jsr add32
+setSRC k
jsr add32
;; !setSRC w[i], and call add32
ldy TMP1
clc
tya
adc #<w
sta SRC
lda #0
adc #>w
sta SRC+1
jsr add32
;; e = d
+setSRC d
+setDST e
jsr cp32
;; d = c
+setSRC c
+setDST d
jsr cp32
;; c = b leftrotate 30
+setSRC b
+setDST c
jsr cp32
jsr ror1
jsr ror1
;; b = a
+setSRC a
+setDST b
jsr cp32
;; a = temp
+setSRC temp
+setDST a
jsr cp32
ldy TMP1
ldx TMP2
iny
iny
iny
iny
rts
;; Replace w[i:i+16] with w[i+16:i+32]. Burns a. Sets y=0.
fill:
+setDST w_next
+setSRC w
ldx #0x10
- sec
lda DST
sbc #16*4
sta SRC
jsr cp32 ; w[i] = w[i-16]
clc
lda SRC
adc #2*4
sta SRC
jsr xor32 ; ^ w[i-14]
lda SRC
adc #6*4
sta SRC
jsr xor32 ; ^ w[i-8]
lda SRC
adc #5*4
sta SRC
jsr xor32 ; ^ w[i-3]
jsr rol1
clc
lda DST
adc #4 ; i++
sta DST
dex
bne -
ldx #w_next-w-1
- lda w_next,x
sta w,x
dex
bpl -
ldy #0
rts
;;; 32-bit, big-endian math routines.
;;; Result goes in DST. Second operand (if any)
;;; comes from SRC.
;; Rotate-left DST. Burns a,y.
rol1:
ldy #0
lda (DST),y
rol
ldy #3
- lda (DST),y
rol
sta (DST),y
dey
bpl -
rts
;; Rotate-right DST. Burns a,y.
ror1:
ldy #3
lda (DST),y
ror
ldy #0
php
- lda (DST),y
plp
ror
php
sta (DST),y
iny
cpy #4
bne -
plp
rts
;; Xor SRC into DST. Burns a,y.
xor32:
ldy #3
- lda (SRC),y
eor (DST),y
sta (DST),y
dey
bpl -
rts
;; Copy DST to SRC. Burns a,y.
cp32:
ldy #3
- lda (SRC),y
sta (DST),y
dey
bpl -
rts
add32:
clc
ldy #3
- lda (SRC),y
adc (DST),y
sta (DST),y
dey
bpl -
rts
and32:
clc
ldy #3
- lda (SRC),y
and (DST),y
sta (DST),y
dey
bpl -
rts
or32:
clc
ldy #3
- lda (SRC),y
ora (DST),y
sta (DST),y
dey
bpl -
rts
;; Rotate DST right by 8 bits. Burns a,x,y.
rol8:
ldy #0
lda (DST),y
tax
- iny
lda (DST),y
dey
sta (DST),y
iny
cpy #3
bne -
txa
sta (DST),y
rts
!eof
TODOs
[X] Routine to print n hex bytes (address, length (byte))
[X] Routine to get the next 16 values (64 bytes) of input
[X] Routine to get w[i] one i at a time, and rebuild next 16 values
Needed arithmetic routines for sha1sum:
- [X] add32
- [X] and32
- [X] or32
- [X] xor32
- [X] ROL1
- [X] ROR1
- [X] ROL5 --> ROL8, (ROR1,ROR1,ROR1)
- [X] ROL30 --> (ROR1,ROR1)