!to "shasum.o", plain * = $6000 jmp main ;; clear addresses: ;; (http://apple2.org.za/gswv/a2zine/faqs/csa2pfaq.html#017) ;; 06-09 ;; EB-EF ;; FA-FD !addr SRC = $06 !addr DST = $08 !addr INPUT = $eb !addr LENGTH = $ee !addr TMP1 = $fa !addr TMP2 = $fb !addr PRBYTE = $FDDA !addr COUT = $FDED h0: !32 0 ; return value (hash) h1: !32 0 h2: !32 0 h3: !32 0 h4: !32 0 h5: ml: !32 0, 0 ; message length w: !fill 64, 0 w_next: !fill 64, 0 a: !32 0 b: !32 0 c: !32 0 d: !32 0 e: !32 0 f: !32 0 temp: !32 0 k: !32 0 kh0: !be32 $67452301 ; initial values for h0..h4 kh1: !be32 $EFCDAB89 kh2: !be32 $98BADCFE kh3: !be32 $10325476 kh4: !be32 $C3D2E1F0 k1 = $5A827999 ; k constants k2 = $6ED9EBA1 k3 = $8F1BBCDC k4 = $CA62C1D6 !macro set32 .target, .value { lda #<(.value >> 24) sta .target lda #<(.value >> 16) sta .target+1 lda #<(.value >> 8) sta .target+2 lda #<(.value) sta .target+3 } !macro setSRC .source { lda #<.source sta SRC lda #>.source sta SRC+1 } !macro setDST .dest { lda #<.dest sta DST lda #>.dest sta DST+1 } ;;; Print a string of bytes, as hex. ;;; Address in SRC, count in A. ;;; Burns A,Y. prbytes: ldy #0 - pha lda (SRC),y jsr PRBYTE iny pla adc #$ff bne - rts main: ;; Test shasum "" lda #0 sta INPUT lda #$fe sta INPUT+1 lda #0 sta LENGTH+1 lda #0 ; da39a3ee5e6b4b0d3255bfef95601890afd80709 sta LENGTH jsr shasum ; lda #$8d ; jsr COUT +setSRC h0 lda #(h5-h0) jsr prbytes ;; Test shasum FE00[:0x37] lda #0 sta INPUT lda #$fe sta INPUT+1 lda #0 sta LENGTH+1 lda #$37 ; 1CF73FC6156B548A949D315120B5256245EAA33E sta LENGTH jsr shasum ; lda #$8d ; jsr COUT +setSRC h0 lda #(h5-h0) jsr prbytes ;; Test shasum FE00[:0x100] lda #0 sta INPUT lda #$fe sta INPUT+1 lda #1 sta LENGTH+1 lda #0 ; 7B3D05347B52210065E27054FDFD0B8B699F0965 sta LENGTH jsr shasum ; lda #$8d ; jsr COUT +setSRC h0 lda #(h5-h0) jsr prbytes ;; Test shasum FE00[:0x1ff] lda #0 sta INPUT lda #$fe sta INPUT+1 lda #$1 sta LENGTH+1 lda #$ff ; 269CA6B0C644DAC01D908B20C10C0D5B19C52ABF sta LENGTH jsr shasum ; lda #$8d ; jsr COUT +setSRC h0 lda #(h5-h0) jsr prbytes ;; Test shasum FE00[:0x200] lda #0 sta INPUT lda #$fe sta INPUT+1 lda #2 sta LENGTH+1 lda #0 ; D5AC71D5EE76E31CC82CF5136151BF4CDA503601 sta LENGTH jsr shasum ; lda #$8d ; jsr COUT +setSRC h0 lda #(h5-h0) jsr prbytes rts shasum: ;; Initialize h0..h4 ldy #(h5-h0-1) - lda kh0,y sta h0,y dey bpl - ;; Initialize message length (ml) lda #0 ldy #4 - sta ml, y dey bpl - lda LENGTH sta ml+7 lda LENGTH+1 sta ml+6 ;; Message length is in bits ldy #3 - asl ml+7 rol ml+6 rol ml+5 dey bne - ;; Initialize chunk counter ;; ldy #0 ; already zero ;; Invert length so we can inc instead of dec lda LENGTH sec lda #0 sbc LENGTH sta LENGTH lda #0 sbc LENGTH+1 sta LENGTH+1 ora LENGTH beq .msgdone .loop lda (INPUT),y sta w,y iny cpy #$40 bne + ;; Call do_chunk jsr do_chunk ldy #0 clc lda INPUT adc #$40 sta INPUT bcc + inc INPUT+1 + inc LENGTH bne .loop inc LENGTH+1 bne .loop .msgdone: lda #$80 sta w,y iny cpy #$40 bne .zeros jsr do_chunk ldy #0 .zeros cpy #$38 beq .length lda #0 sta w,y iny cpy #$40 bne .zeros jsr do_chunk ldy #0 jmp .zeros .length ldy #7 - lda ml,y sta w+$38,y dey bpl - jsr do_chunk rts ;;; do_chunk processes a chunk of input. It burns A,X,Y,TMP1,TMP2. do_chunk: ;; Copy a..e from h0..h4 ldy #(f-a-1) - lda h0,y sta a,y dey bpl - ldy #0 ; y is index into w ;; First 20: k1 +set32 k, k1 ldx #16 - jsr kind1 dex bne - jsr fill ldx #4 - jsr kind1 dex bne - ;; Second 20: k2 +set32 k, k2 ldx #12 - jsr kind2 dex bne - jsr fill ldx #8 - jsr kind2 dex bne - ;; Third 20: k3 +set32 k, k3 ldx #8 - jsr kind3 dex bne - jsr fill ldx #12 - jsr kind3 dex bne - ;; Fourth 20: k4 +set32 k, k4 ldx #4 - jsr kind2 dex bne - jsr fill ldx #16 - jsr kind2 dex bne - +setSRC a +setDST h0 ldx #5 - jsr add32 clc lda SRC adc #4 sta SRC lda DST adc #4 sta DST dex bne - rts kind1: sty TMP1 stx TMP2 ;; f = d xor (b and (c xor d)) +setDST f +setSRC d jsr cp32 +setSRC c jsr xor32 +setSRC b jsr and32 +setSRC d jsr xor32 jmp common kind2: sty TMP1 stx TMP2 ;; f = b xor c xor d +setDST f +setSRC d jsr cp32 +setSRC c jsr xor32 +setSRC b jsr xor32 jmp common kind3: sty TMP1 stx TMP2 ;; f = (b and c) or (d and (b or c)) +setSRC c +setDST f jsr cp32 +setDST temp jsr cp32 +setSRC b jsr and32 +setDST f jsr or32 +setSRC d jsr and32 +setSRC temp jsr or32 ; jmp common common: ;; temp = (a leftrotate 5) + f + e + k + w[i] +setDST temp +setSRC a jsr cp32 jsr rol8 jsr ror1 jsr ror1 jsr ror1 +setSRC f jsr add32 +setSRC e jsr add32 +setSRC k jsr add32 ;; !setSRC w[i], and call add32 ldy TMP1 clc tya adc #w sta SRC+1 jsr add32 ;; e = d +setSRC d +setDST e jsr cp32 ;; d = c +setSRC c +setDST d jsr cp32 ;; c = b leftrotate 30 +setSRC b +setDST c jsr cp32 jsr ror1 jsr ror1 ;; b = a +setSRC a +setDST b jsr cp32 ;; a = temp +setSRC temp +setDST a jsr cp32 ldy TMP1 ldx TMP2 iny iny iny iny rts ;; Replace w[i:i+16] with w[i+16:i+32]. Burns a. Sets y=0. fill: +setDST w_next +setSRC w ldx #0x10 - sec lda DST sbc #16*4 sta SRC jsr cp32 ; w[i] = w[i-16] clc lda SRC adc #2*4 sta SRC jsr xor32 ; ^ w[i-14] lda SRC adc #6*4 sta SRC jsr xor32 ; ^ w[i-8] lda SRC adc #5*4 sta SRC jsr xor32 ; ^ w[i-3] jsr rol1 clc lda DST adc #4 ; i++ sta DST dex bne - ldx #w_next-w-1 - lda w_next,x sta w,x dex bpl - ldy #0 rts ;;; 32-bit, big-endian math routines. ;;; Result goes in DST. Second operand (if any) ;;; comes from SRC. ;; Rotate-left DST. Burns a,y. rol1: ldy #0 lda (DST),y rol ldy #3 - lda (DST),y rol sta (DST),y dey bpl - rts ;; Rotate-right DST. Burns a,y. ror1: ldy #3 lda (DST),y ror ldy #0 php - lda (DST),y plp ror php sta (DST),y iny cpy #4 bne - plp rts ;; Xor SRC into DST. Burns a,y. xor32: ldy #3 - lda (SRC),y eor (DST),y sta (DST),y dey bpl - rts ;; Copy DST to SRC. Burns a,y. cp32: ldy #3 - lda (SRC),y sta (DST),y dey bpl - rts add32: clc ldy #3 - lda (SRC),y adc (DST),y sta (DST),y dey bpl - rts and32: clc ldy #3 - lda (SRC),y and (DST),y sta (DST),y dey bpl - rts or32: clc ldy #3 - lda (SRC),y ora (DST),y sta (DST),y dey bpl - rts ;; Rotate DST right by 8 bits. Burns a,x,y. rol8: ldy #0 lda (DST),y tax - iny lda (DST),y dey sta (DST),y iny cpy #3 bne - txa sta (DST),y rts !eof TODOs [X] Routine to print n hex bytes (address, length (byte)) [X] Routine to get the next 16 values (64 bytes) of input [X] Routine to get w[i] one i at a time, and rebuild next 16 values Needed arithmetic routines for sha1sum: - [X] add32 - [X] and32 - [X] or32 - [X] xor32 - [X] ROL1 - [X] ROR1 - [X] ROL5 --> ROL8, (ROR1,ROR1,ROR1) - [X] ROL30 --> (ROR1,ROR1)