mirror of
https://github.com/option8/8BITCOIN.git
synced 2024-11-04 16:05:49 +00:00
2219 lines
66 KiB
ArmAsm
2219 lines
66 KiB
ArmAsm
|
DSK HASH
|
||
|
|
||
|
**************************************************
|
||
|
* Note:
|
||
|
* If the code looks inefficient and redundant, it is.
|
||
|
* Lots of things that should loop are unrolled for speed.
|
||
|
* Built with Merlin32 from Brutal Deluxe: brutaldeluxe.fr/products/crossdevtools/merlin/
|
||
|
*
|
||
|
* Timing measurements include setup, hashing and printing result.
|
||
|
*
|
||
|
* V1: 947,063 cycles.
|
||
|
*
|
||
|
* Macros: converted several common subroutines to macros
|
||
|
* for additional speed, at expense of memory size.
|
||
|
* Currently runs 1 hash in 922,394 cycles.
|
||
|
*
|
||
|
* Zero page: moved some variables to zero page from Memory
|
||
|
* Down to 916,595 cycles.
|
||
|
*
|
||
|
* Further optimized, more macros: 875,166
|
||
|
* Further optimized: 804,584
|
||
|
* Unrolled shift routines: 682,744 (Now, it's getting good.)
|
||
|
* Pulled out all the stops: 671,524 (Time to hand off to Qkumba.)
|
||
|
|
||
|
|
||
|
* What's being hashed is: version4 + previous block hash32 + merkel root32 + time4 + bits (target)4 + nonce4 = blockheader (80 bytes)
|
||
|
|
||
|
* 80 bytes gets broken into two message blocks
|
||
|
|
||
|
* This gets hashed twice sha256(sha256(blockheader))
|
||
|
* https://en.wikipedia.org/wiki/SHA-2
|
||
|
|
||
|
* http://www.yogh.io/#mine:last
|
||
|
* https://en.bitcoin.it/wiki/Block_hashing_algorithm
|
||
|
* https://en.bitcoin.it/wiki/Protocol_documentation
|
||
|
|
||
|
* total time to run a complete hash (three SHA256 iterations total): 1,994,362 cycles
|
||
|
* more loops to macros (LUP): 1,906,858
|
||
|
* 1,559,735
|
||
|
* Qkumba optimization, part 1: 1,248,727
|
||
|
* 1,238,169
|
||
|
* 1,218,659
|
||
|
* added nonce display, unrolled a couple of loops 1,210,817
|
||
|
* where next? PRBYTE is 255
|
||
|
* CROUT is 3,248 (!)
|
||
|
* printing the result is 19,396 cycles alone!
|
||
|
* down to 11,455 now
|
||
|
* RORx8 into Byte 1,096,763
|
||
|
* more macros. 1,068,543
|
||
|
*
|
||
|
* instead of CROUTx2, set cursor to top of window and print over top? 1,055,753
|
||
|
* whoa 929,375
|
||
|
* rotating Vn macro 891,692
|
||
|
* MAJ and CHOICE optimized 831,999
|
||
|
* XOR32 to combined macro 796,115
|
||
|
* 780,145
|
||
|
* unrolled HASHTOMESSAGE 779,427 = 103.765 years
|
||
|
* who needs 1 VTAB? 779,255
|
||
|
* or 2 CROUTs? 778,751
|
||
|
*
|
||
|
* LDVV - MACROS with parameters. 775,323
|
||
|
* macros with parameters? Mind. Blown.
|
||
|
* target: 751,237 = 1 full 2^32 hashes in 100 years.
|
||
|
* unrolled COPYCHUNK1 714,904
|
||
|
* replaced prbyte with my own: 708,036
|
||
|
* special temp/s0 macros: 662,220
|
||
|
* LDAW, LDWADDX, LDS, inline JSRs 627,718
|
||
|
|
||
|
**************************************************
|
||
|
* Variables
|
||
|
**************************************************
|
||
|
|
||
|
INPUT32 EQU $E0 ; DS 4 ; 32-bit Accumulator
|
||
|
XREGISTER32 EQU $E4 ; DS 4 ; input 1 for XOR, etc (X)
|
||
|
YREGISTER32 EQU $E8 ; DS 4 ; input 2 for MAJ, etc (Y)
|
||
|
RESULT32 EQU $EC ; DS 4 ; temp storage for various operations
|
||
|
|
||
|
CURRENTCHUNK EQU $FF ; chunk zero or one.
|
||
|
HASHPASS EQU $FE ; pass zero or one.
|
||
|
|
||
|
CURRENTMESSAGELO EQU $FC
|
||
|
CURRENTMESSAGEHI EQU $FD
|
||
|
|
||
|
S0 EQU $80
|
||
|
S1 EQU $84
|
||
|
|
||
|
TEMP0 EQU $88 ; temp storage for various operations
|
||
|
TEMP1 EQU $8C ; temp storage for various operations
|
||
|
|
||
|
|
||
|
|
||
|
**************************************************
|
||
|
* Apple Standard Memory Locations
|
||
|
**************************************************
|
||
|
CLRLORES EQU $F832
|
||
|
LORES EQU $C050
|
||
|
TXTSET EQU $C051
|
||
|
MIXCLR EQU $C052
|
||
|
MIXSET EQU $C053
|
||
|
TXTPAGE1 EQU $C054
|
||
|
TXTPAGE2 EQU $C055
|
||
|
KEY EQU $C000
|
||
|
C80STOREOFF EQU $C000
|
||
|
C80STOREON EQU $C001
|
||
|
STROBE EQU $C010
|
||
|
SPEAKER EQU $C030
|
||
|
VBL EQU $C02E
|
||
|
RDVBLBAR EQU $C019 ; not VBL (VBL signal low
|
||
|
WAIT EQU $FCA8
|
||
|
RAMWRTAUX EQU $C005
|
||
|
RAMWRTMAIN EQU $C004
|
||
|
SETAN3 EQU $C05E ; Set annunciator-3 output to 0
|
||
|
SET80VID EQU $C00D ; enable 80-column display mode (WR-only)
|
||
|
HOME EQU $FC58 ; clear the text screen
|
||
|
VTAB EQU $FC22 ; Sets the cursor vertical position (from CV)
|
||
|
COUT EQU $FDED ; Calls the output routine whose address is stored in CSW,
|
||
|
;COUTI EQU $fbf0 ; normally COUTI
|
||
|
CROUT EQU $FD8E ; prints CR
|
||
|
|
||
|
STROUT EQU $DB3A ;Y=String ptr high, A=String ptr low
|
||
|
PRBYTE EQU $FDDA ; print hex byte in A
|
||
|
|
||
|
ALTTEXT EQU $C055
|
||
|
ALTTEXTOFF EQU $C054
|
||
|
|
||
|
|
||
|
PB0 EQU $C061 ; paddle 0 button. high bit set when pressed.
|
||
|
PDL0 EQU $C064 ; paddle 0 value, or should I use PREAD?
|
||
|
PREAD EQU $FB1E
|
||
|
|
||
|
ROMINIT EQU $FB2F
|
||
|
ROMSETKBD EQU $FE89
|
||
|
ROMSETVID EQU $FE93
|
||
|
|
||
|
ALTCHAR EQU $C00F ; enables alternative character set - mousetext
|
||
|
|
||
|
CH EQU $24 ; cursor Horiz
|
||
|
CV EQU $25 ; cursor Vert
|
||
|
|
||
|
WNDWDTH EQU $21 ; Width of text window
|
||
|
WNDTOP EQU $22 ; Top of text window
|
||
|
|
||
|
**************************************************
|
||
|
* START - sets up various fiddly zero page bits
|
||
|
**************************************************
|
||
|
|
||
|
ORG $2000 ; PROGRAM DATA STARTS AT $2000
|
||
|
|
||
|
JSR HOME ; clear screen
|
||
|
|
||
|
STA $C050 ; rw:TXTCLR ; Set Lo-res page 1, mixed graphics + text
|
||
|
STA $C053 ; rw:MIXSET
|
||
|
STA $C054 ; rw:TXTPAGE1
|
||
|
STA $C056 ; rw:LORES
|
||
|
|
||
|
JSR FILLSCREENFAST ; blanks screen to black.
|
||
|
JSR SPLASHSCREEN ; fancy lo-res graphics
|
||
|
|
||
|
JSR FLIPCOIN
|
||
|
|
||
|
; set text window to last 4 lines of GR screen.
|
||
|
LDA #$14
|
||
|
STA CV
|
||
|
STA WNDTOP
|
||
|
JSR VTAB
|
||
|
|
||
|
**************************************************
|
||
|
* SETUP
|
||
|
**************************************************
|
||
|
*
|
||
|
* Initialize hash values:
|
||
|
* (first 32 bits of the fractional parts of the square roots of the first 8 primes 2..19):
|
||
|
* See HTABLE
|
||
|
*
|
||
|
* Initialize array of round constants:
|
||
|
* (first 32 bits of the fractional parts of the cube roots of the first 64 primes 2..311):
|
||
|
* See KTABLE
|
||
|
*
|
||
|
* Pre-processing (Padding):
|
||
|
* begin with the original message of length L bits (80*8 = 640bits)
|
||
|
* append a single '1' bit (641bits)
|
||
|
* means shifting everything over 1 bit to be 81 bytes
|
||
|
* append K '0' bits, where K is the minimum number >= 0 such that L + 1 + K + 64 is a multiple of 512 (640+1+K+64=1024 K=319)
|
||
|
* append L as a 64-bit big-endian integer, making the total post-processed length a multiple of 512 bits (append 0000000000000280)
|
||
|
|
||
|
**************************************************
|
||
|
* Pre-processing (Padding):
|
||
|
**************************************************
|
||
|
; Start with MESSAGE padded out to 1024bits (see MESSAGE below)
|
||
|
|
||
|
* Process the message in successive 512-bit chunks:
|
||
|
* break message into 512-bit chunks
|
||
|
|
||
|
* 80byte header yields 1024bit message, so chunks = 2
|
||
|
|
||
|
PREPROCESS
|
||
|
LDA #$00
|
||
|
STA HASHPASS ; pass the first = 0
|
||
|
STA CURRENTCHUNK ; chunk the first = 0
|
||
|
|
||
|
LDA MESSAGELO
|
||
|
STA CURRENTMESSAGELO
|
||
|
LDA MESSAGEHI
|
||
|
STA CURRENTMESSAGEHI
|
||
|
|
||
|
INITIALIZEHASH ; for the 32 bytes in INITIALHASH, push them into H00-H07
|
||
|
|
||
|
INITIALHASHES
|
||
|
]hashnumber = 31
|
||
|
LUP 32
|
||
|
LDA INITIALHASH + ]hashnumber
|
||
|
STA H00 + ]hashnumber
|
||
|
]hashnumber = ]hashnumber - 1
|
||
|
--^
|
||
|
|
||
|
* for each chunk
|
||
|
* create a 64-entry message schedule array w[0..63] of 32-bit words
|
||
|
* (The initial values in w[0..63] don't matter, so many implementations zero them here)
|
||
|
|
||
|
* See WTABLE
|
||
|
|
||
|
* copy chunk into first 16 words w[0..15] of the message schedule array
|
||
|
|
||
|
COPYCHUNKS LDA CURRENTCHUNK ; which chunk?
|
||
|
BNE NEXTCHUNK ; skip chunk0 if already done
|
||
|
|
||
|
LDA CURRENTMESSAGELO
|
||
|
STA $00 ; ***** UNROLL for speedup? how, with indirect address (),Y?
|
||
|
LDA CURRENTMESSAGEHI
|
||
|
STA $01
|
||
|
|
||
|
LDY #$3F ; Y = 63 to 0 on chunk 0, then 64 to 127 on chunk 1
|
||
|
COPYCHUNK0 LDA ($0),Y
|
||
|
STA W00,Y
|
||
|
DEY
|
||
|
BPL COPYCHUNK0 ; if hasn't rolled over to FF, loop to copy next byte.
|
||
|
|
||
|
***** if I'm on second pass, only do chunk0
|
||
|
; HASHPASS = 1, add to CURRENTCHUNK?
|
||
|
CLC
|
||
|
LDA CURRENTCHUNK
|
||
|
ADC HASHPASS
|
||
|
STA CURRENTCHUNK
|
||
|
***** if I'm on second pass, only do chunk0
|
||
|
|
||
|
JMP EXTENDWORDS ; done with chunk 0
|
||
|
|
||
|
NEXTCHUNK
|
||
|
LDA CURRENTMESSAGELO
|
||
|
STA $00
|
||
|
LDA CURRENTMESSAGEHI
|
||
|
STA $01
|
||
|
|
||
|
|
||
|
**** Only does this (second chunk) on first pass. So CURRENTMESSAGE always points to MESSAGE (never MESSAGE2)
|
||
|
]chunkbyte = 64
|
||
|
LUP 64
|
||
|
COPYCHUNK1 LDA MESSAGE + ]chunkbyte
|
||
|
STA W00 - 64 + ]chunkbyte ;
|
||
|
]chunkbyte = ]chunkbyte + 1
|
||
|
--^
|
||
|
|
||
|
**** Only does this (second chunk) on first pass.
|
||
|
|
||
|
|
||
|
|
||
|
* Extend the first 16 words into the remaining 48 words w[16..63] of the message schedule array:
|
||
|
|
||
|
* for i from 16 to 63
|
||
|
* s0 = (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
|
||
|
* s0 = (XREGISTER32) xor (YREGISTER32) xor (INPUT32)
|
||
|
|
||
|
EXTENDWORDS
|
||
|
LDA #$0F ; 15
|
||
|
PHA ; push to stack = 15
|
||
|
|
||
|
EXTEND PLA ; pull A from stack (15)
|
||
|
CLC ; clear carry
|
||
|
ADC #$01 ; increment accumulator = 16
|
||
|
CMP #$40 ; compare to 64
|
||
|
|
||
|
BNE EXTEND2 ; done with EXTEND step (done through 63)
|
||
|
JMP INITIALIZE
|
||
|
|
||
|
EXTEND2 PHA ; push new A to stack = 16
|
||
|
SEC ; set carry for subtract
|
||
|
SBC #$0F ; -15
|
||
|
CLC
|
||
|
ROL
|
||
|
ROL
|
||
|
TAX ; X now = X*4
|
||
|
|
||
|
LDAW ; takes X as arg. load W[a-15] into INPUT32
|
||
|
|
||
|
|
||
|
RIGHTROTATE7 LUP 7
|
||
|
RIGHTROTATE32 ; ROR32 7 times
|
||
|
--^
|
||
|
|
||
|
TAX32 ; should store partial result at XREGISTER32
|
||
|
|
||
|
RIGHTROTATE18 RIGHTROTATE8
|
||
|
LUP 3
|
||
|
RIGHTROTATE32 ; ROR32 11 more times
|
||
|
--^
|
||
|
|
||
|
TAY32 ; should store partial result at YREGISTER32
|
||
|
|
||
|
; X still = X*4
|
||
|
LDAW ; load W[a-15] into INPUT32
|
||
|
|
||
|
RIGHTSHIFT3 LUP 3
|
||
|
RIGHTSHIFT32 ; shift right, ignore carry
|
||
|
--^
|
||
|
; store partial result in INPUT32
|
||
|
|
||
|
* s0 = (w[i-15] rightrotate 7) xor (w[i-15] rightrotate 18) xor (w[i-15] rightshift 3)
|
||
|
* s0 = (XREGISTER32) xor (YREGISTER32) xor (INPUT32)
|
||
|
|
||
|
XORAXY32
|
||
|
|
||
|
; A32 -> TEMP0
|
||
|
STATEMP0
|
||
|
|
||
|
* s1 := (w[i- 2] rightrotate 17) xor (w[i- 2] rightrotate 19) xor (w[i- 2] rightshift 10)
|
||
|
PLA ; i=16
|
||
|
PHA ; back to stack
|
||
|
SEC ; set carry for subtract
|
||
|
SBC #$02 ; -02
|
||
|
|
||
|
CLC
|
||
|
ROL
|
||
|
ROL
|
||
|
TAX ; X = X*4 again
|
||
|
|
||
|
LDAW ; load W14 into INPUT32
|
||
|
|
||
|
RIGHTROTATE17 RIGHTROTATE8
|
||
|
RIGHTROTATE8
|
||
|
RIGHTROTATE32 ; ROR32 17 times
|
||
|
TAX32 ; should store partial result at XREGISTER32
|
||
|
|
||
|
RIGHTROTATE2 LUP 2
|
||
|
RIGHTROTATE32 ; ROR32 2 more times
|
||
|
--^
|
||
|
TAY32 ; should store partial result at YREGISTER32
|
||
|
|
||
|
; ; X = X*4
|
||
|
LDAW ; load W14 into INPUT32
|
||
|
|
||
|
RIGHTSHIFT10 RIGHTSHIFT8
|
||
|
LUP 2
|
||
|
RIGHTSHIFT32 ; shift right, ignore carry
|
||
|
--^
|
||
|
; store partial result in INPUT32
|
||
|
* s1 := (w[i- 2] rightrotate 17) xor (w[i- 2] rightrotate 19) xor (w[i- 2] rightshift 10)
|
||
|
* s1 := (XREGISTER32) xor (YREGISTER32) xor (INPUT32)
|
||
|
|
||
|
XORAXY32
|
||
|
|
||
|
* w[i] := w[i-16] + s0 + w[i-7] + s1
|
||
|
* w[i] := w[i-16] + TEMP0 + w[i-7] + INPUT32
|
||
|
|
||
|
TAX32 ; INPUT32 to XREGISTER32
|
||
|
|
||
|
; TEMP0 -> A32
|
||
|
LDATEMP0
|
||
|
|
||
|
* w[i] := w[i-16] + INPUT32 + w[i-7] + XREGISTER32
|
||
|
CLC
|
||
|
ADC32 ; add S0 and S1
|
||
|
TAX32 ; transfer to XREGISTER32
|
||
|
|
||
|
PLA ; copy A from stack
|
||
|
PHA ; i=16
|
||
|
SEC
|
||
|
SBC #$10 ; w[0]
|
||
|
TAX
|
||
|
|
||
|
; load W00 into pointer, add with X32
|
||
|
LDWADDX ; takes X
|
||
|
|
||
|
TAX32 ; transfer to XREGISTER32
|
||
|
|
||
|
PLA ; copy A from stack
|
||
|
PHA ; i=16
|
||
|
SEC
|
||
|
SBC #$07 ; w[09]
|
||
|
TAX
|
||
|
|
||
|
; load W09 into pointer, add with X32
|
||
|
LDWADDX ; takes X
|
||
|
|
||
|
; store result in w[i]
|
||
|
PLA ; copy A from stack
|
||
|
PHA ; i=16
|
||
|
|
||
|
STOREW LDWSTA32 ; store in W16
|
||
|
|
||
|
JMP EXTEND ; repeat until i=63
|
||
|
|
||
|
INITIALIZE
|
||
|
* Initialize working variables to current hash value:
|
||
|
* Va := h00
|
||
|
* Vb := h01
|
||
|
* Vc := h02
|
||
|
* Vd := h03
|
||
|
* Ve := h04
|
||
|
* Vf := h05
|
||
|
* Vg := h06
|
||
|
* Vh := h07
|
||
|
|
||
|
HASHTOV
|
||
|
|
||
|
]bytenumber = 0
|
||
|
LUP 32
|
||
|
HTOV LDA H00 + ]bytenumber
|
||
|
STA VA + ]bytenumber
|
||
|
]bytenumber = ]bytenumber + 1
|
||
|
--^
|
||
|
|
||
|
**************************************************
|
||
|
* MAIN LOOP. OPTIMIZE THIS.
|
||
|
**************************************************
|
||
|
|
||
|
COMPRESSION
|
||
|
|
||
|
* Compression function main loop:
|
||
|
* for i from 0 to 63
|
||
|
|
||
|
LDA #$00
|
||
|
COMPRESS PHA ; round number to stack
|
||
|
|
||
|
* S1 := (e rightrotate 6) xor (e rightrotate 11) xor (e rightrotate 25)
|
||
|
|
||
|
LDVLDA 4 ; pointer to VE
|
||
|
|
||
|
RIGHTROTATE06 LUP 6
|
||
|
RIGHTROTATE32 ; shift right, ignore carry
|
||
|
--^
|
||
|
TAX32 ; result in XREGISTER32
|
||
|
|
||
|
RIGHTROTATE11 LUP 5
|
||
|
RIGHTROTATE32 ; shift right 5 more times=11, ignore carry
|
||
|
--^
|
||
|
TAY32 ; result in YREGISTER32
|
||
|
|
||
|
RIGHTROTATE25 RIGHTROTATE8
|
||
|
LUP 6
|
||
|
RIGHTROTATE32 ; shift right 14 more times=25, ignore carry
|
||
|
--^
|
||
|
|
||
|
* S1 := (XREGISTER32) xor (YREGISTER32) xor (INPUT32)
|
||
|
|
||
|
XORAXY32
|
||
|
|
||
|
|
||
|
;S1
|
||
|
STAS1 ; store INPUT32 in S1
|
||
|
|
||
|
|
||
|
**** CHOICE and MAJ always take the same 3 arguments - make macros
|
||
|
|
||
|
* ch := (e and f) xor ((not e) and g)
|
||
|
|
||
|
CHOICE32
|
||
|
; CH in INPUT32
|
||
|
* temp1 := Vh + S1 + ch + k[i] + w[i] = TEMP0
|
||
|
|
||
|
; S1 + CH
|
||
|
LDSADC32 4 ; (S1 + ch) in INPUT32
|
||
|
|
||
|
; + VH
|
||
|
LDVHADC32
|
||
|
|
||
|
PLA ; pull i from stack
|
||
|
PHA ; back in stack
|
||
|
TAX
|
||
|
LDKADC32 ; K[i] in pointer
|
||
|
; + K[i]
|
||
|
|
||
|
PLA ; pull i from stack
|
||
|
PHA ; back in stack
|
||
|
TAX
|
||
|
LDWADC ; W[i] in pointer
|
||
|
; + W[i]
|
||
|
; LDXADC32 ; (S1 + ch + VH + k[i] + w[i]) in INPUT32
|
||
|
|
||
|
; = TEMP0
|
||
|
STATEMP0 ; store temp1 at TEMP0
|
||
|
|
||
|
|
||
|
|
||
|
* S0 := (a rightrotate 2) xor (a rightrotate 13) xor (a rightrotate 22)
|
||
|
|
||
|
LDVLDA 0 ; pointer to VA
|
||
|
|
||
|
RIGHTROTATE02 LUP 2
|
||
|
RIGHTROTATE32 ; ROR 2 times
|
||
|
--^
|
||
|
TAX32 ; result in XREGISTER32
|
||
|
|
||
|
RIGHTROTATE13 RIGHTROTATE8
|
||
|
LUP 3
|
||
|
RIGHTROTATE32 ; ROR 11 more times=13
|
||
|
--^
|
||
|
TAY32 ; result in YREGISTER32
|
||
|
|
||
|
RIGHTROTATE22 RIGHTROTATE8
|
||
|
RIGHTROTATE32 ; ROR 9 more times=22
|
||
|
|
||
|
|
||
|
* S0 := (XREGISTER32) xor (YREGISTER32) xor (INPUT32)
|
||
|
|
||
|
XORAXY32
|
||
|
|
||
|
;S0
|
||
|
STAS0 ; store INPUT32 in S0
|
||
|
|
||
|
|
||
|
**** CHOICE and MAJ always take the same 3 arguments - make macros
|
||
|
|
||
|
* maj := (a and b) xor (a and c) xor (b and c)
|
||
|
; load A,B,C into A32,X32,Y32
|
||
|
MAJ32 ; MAJ in INPUT32
|
||
|
|
||
|
* temp2 := S0 + maj
|
||
|
* temp2 := S0 + INPUT32
|
||
|
; load S0 into X32
|
||
|
;S0 -> X32
|
||
|
LDA STABLELO ; takes X as argument
|
||
|
STA $00
|
||
|
LDA STABLEHI
|
||
|
STA $01 ; now word/pointer at $0+$1 points to 32bit word at STABLE,X
|
||
|
LDX32 ; S0 in XREGISTER32
|
||
|
|
||
|
CLC
|
||
|
ADC32 ; TEMP2 in INPUT32
|
||
|
|
||
|
;A32 -> TEMP1
|
||
|
STATEMP1 ; temp2 to TEMP1
|
||
|
|
||
|
|
||
|
|
||
|
ROTATE
|
||
|
|
||
|
* Vh := Vg
|
||
|
* Vg := Vf
|
||
|
* Vf := Ve
|
||
|
|
||
|
; Store VG in VH
|
||
|
VXTOVY 6;7
|
||
|
|
||
|
VXTOVY 5;6
|
||
|
|
||
|
VXTOVY 4;5
|
||
|
|
||
|
* Ve := Vd + temp1
|
||
|
|
||
|
LDVLDA 3
|
||
|
|
||
|
;TEMP0 -> X32
|
||
|
LDX TEMPLO
|
||
|
STX $00
|
||
|
LDX TEMPHI
|
||
|
STX $01 ; now word/pointer at $0+$1 points to TEMP0
|
||
|
|
||
|
LDXADC32
|
||
|
|
||
|
LDVSTA 4
|
||
|
|
||
|
|
||
|
|
||
|
* Vd := Vc
|
||
|
* Vc := Vb
|
||
|
* Vb := Va
|
||
|
|
||
|
VXTOVY 2;3
|
||
|
|
||
|
VXTOVY 1;2
|
||
|
|
||
|
VXTOVY 0;1
|
||
|
|
||
|
* Va := temp1 + temp2
|
||
|
|
||
|
;TEMP1 -> X32
|
||
|
LDX TEMPLO+1
|
||
|
STX $00
|
||
|
LDX TEMPHI+1
|
||
|
STX $01 ; now word/pointer at $0+$1 points to TEMP1
|
||
|
|
||
|
LDX32 ; load TEMP1 into XREGISTER32
|
||
|
|
||
|
;TEMP0 -> A32
|
||
|
LDATEMP0
|
||
|
CLC
|
||
|
ADC32
|
||
|
|
||
|
LDVSTA 0
|
||
|
|
||
|
COMPRESSLOOP PLA ; Round 0-63 from stack
|
||
|
CLC
|
||
|
ADC #$01
|
||
|
CMP #$40
|
||
|
BEQ ADDHASH
|
||
|
|
||
|
JMP COMPRESS
|
||
|
**************************************************
|
||
|
* END MAIN LOOP.
|
||
|
|
||
|
* FINALIZE HASH AND OUTPUT.
|
||
|
**************************************************
|
||
|
|
||
|
ADDHASH
|
||
|
|
||
|
* Add the compressed chunk to the current hash value:
|
||
|
|
||
|
|
||
|
|
||
|
* h0 := h0 + Va
|
||
|
* h1 := h1 + Vb
|
||
|
* h2 := h2 + Vc
|
||
|
* h3 := h3 + Vd
|
||
|
* h4 := h4 + Ve
|
||
|
* h5 := h5 + Vf
|
||
|
* h6 := h6 + Vg
|
||
|
* h7 := h7 + Vh
|
||
|
|
||
|
]varbyte = 0
|
||
|
LUP 8
|
||
|
|
||
|
CLC
|
||
|
LDA H00+3 + ]varbyte
|
||
|
ADC VA+3 + ]varbyte
|
||
|
STA H00+3 + ]varbyte
|
||
|
|
||
|
LDA H00+2 + ]varbyte
|
||
|
ADC VA+2 + ]varbyte
|
||
|
STA H00+2 + ]varbyte
|
||
|
|
||
|
LDA H00+1 + ]varbyte
|
||
|
ADC VA+1 + ]varbyte
|
||
|
STA H00+1 + ]varbyte
|
||
|
|
||
|
LDA H00 + ]varbyte
|
||
|
ADC VA + ]varbyte
|
||
|
STA H00 + ]varbyte
|
||
|
]varbyte = ]varbyte + 4
|
||
|
--^
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
CHECKCHUNK LDA CURRENTCHUNK
|
||
|
BNE CHECKPASS ; did I just do chunk 0? INC and go back and do it again
|
||
|
INC CURRENTCHUNK ; set to chunk 1
|
||
|
JMP COPYCHUNKS ;
|
||
|
|
||
|
CHECKPASS LDA HASHPASS ; pass 0? set the message to the hash output and go again
|
||
|
|
||
|
BEQ INCHASHPASS ; pass 1, skip to digest.
|
||
|
|
||
|
JMP DIGEST
|
||
|
|
||
|
INCHASHPASS INC HASHPASS
|
||
|
|
||
|
HASHTOMESSAGE
|
||
|
|
||
|
; for each of 32 bytes, Y
|
||
|
; load byte from H00,Y
|
||
|
; store at MESSAGE2,Y
|
||
|
COPYHASH
|
||
|
|
||
|
]hashbyte = 31
|
||
|
LUP 32
|
||
|
LDA H00 + ]hashbyte
|
||
|
STA MESSAGE2 + ]hashbyte
|
||
|
]hashbyte = ]hashbyte - 1
|
||
|
--^
|
||
|
|
||
|
LDA MESSAGE2LO
|
||
|
STA CURRENTMESSAGELO
|
||
|
LDA MESSAGE2HI
|
||
|
STA CURRENTMESSAGEHI
|
||
|
|
||
|
******* only need one chunk for message2
|
||
|
LDA #$00
|
||
|
STA CURRENTCHUNK
|
||
|
JMP INITIALIZEHASH ; re-initializes the original sqrt hash values for pass 2
|
||
|
|
||
|
DIGEST ; done the thing.
|
||
|
|
||
|
LDA #$14
|
||
|
STA CV
|
||
|
LDA #$00
|
||
|
STA CH
|
||
|
LDA #$06 ; set the memory location for line $14.
|
||
|
STA $29 ;
|
||
|
LDA #$50 ;
|
||
|
STA $28 ;
|
||
|
|
||
|
PRNONCE
|
||
|
]hashbyte = 0
|
||
|
LUP 4
|
||
|
LDA NONCE + ]hashbyte ; load from table pointer
|
||
|
PRHEX ; PRBYTE - clobbers Y
|
||
|
;**** ROLL MY OWN?
|
||
|
]hashbyte = ]hashbyte + 1
|
||
|
--^
|
||
|
|
||
|
INCNONCE
|
||
|
|
||
|
|
||
|
|
||
|
INC CV ; down one line
|
||
|
INC CV ; down one line
|
||
|
LDA #$00
|
||
|
STA CH ; left cursor
|
||
|
INC $29 ; 0650 -> 0750
|
||
|
LDA #$50 ;
|
||
|
STA $28 ;
|
||
|
|
||
|
|
||
|
PRDIGEST
|
||
|
LDA H00
|
||
|
BNE PRBYTE1
|
||
|
; if zero, spin the coin
|
||
|
JSR FLIPCOIN
|
||
|
LDA H00
|
||
|
PRBYTE1 PRHEX
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
]hashbyte = 1
|
||
|
LUP 19
|
||
|
LDA H00 + ]hashbyte
|
||
|
PRHEX
|
||
|
|
||
|
]hashbyte = ]hashbyte + 1
|
||
|
--^
|
||
|
|
||
|
|
||
|
NEXTLINE LDA #$00
|
||
|
STA CH
|
||
|
INC CV
|
||
|
LDA #$D0
|
||
|
STA $28 ; $0750 to $07D0
|
||
|
|
||
|
]hashbyte = 20
|
||
|
LUP 12
|
||
|
LDA H00 + ]hashbyte
|
||
|
PRHEX
|
||
|
|
||
|
]hashbyte = ]hashbyte + 1
|
||
|
--^
|
||
|
|
||
|
JMP PREPROCESS ; INC NONCE, start over.
|
||
|
|
||
|
|
||
|
|
||
|
|
||
|
DONEWORK ; processed all the 2^32 nonce values. WTF?
|
||
|
|
||
|
RTS
|
||
|
|
||
|
|
||
|
|
||
|
**************************************************
|
||
|
* macros (expanded at assembly time)
|
||
|
**************************************************
|
||
|
|
||
|
LDW MAC
|
||
|
LDA WTABLELO,X ; takes X as argument
|
||
|
STA $00
|
||
|
LDA WTABLEHI,X
|
||
|
STA $01 ; now word/pointer at $0+$1 points to 32bit word at WTABLE,X
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
LDK MAC
|
||
|
LDA KTABLELO,X ; takes X as argument
|
||
|
STA $00
|
||
|
LDA KTABLEHI,X
|
||
|
STA $01 ; now word/pointer at $0+$1 points to 32bit word at KTABLE,X
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
; LDH MAC
|
||
|
; LDA HTABLELO,X ; takes X as argument
|
||
|
; STA $00
|
||
|
; LDA HTABLEHI,X
|
||
|
; STA $01 ; now word/pointer at $0+$1 points to 32bit word at HTABLE,X
|
||
|
; <<< ; End of Macro
|
||
|
|
||
|
LDV MAC
|
||
|
LDA VTABLELO,X ; takes X as argument
|
||
|
STA $00
|
||
|
LDA VTABLEHI,X
|
||
|
STA $01 ; now word/pointer at $0+$1 points to 32bit word at VTABLE,X
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
LDVV MAC
|
||
|
LDA VTABLELO+]1 ; takes X as argument
|
||
|
STA $00
|
||
|
LDA VTABLEHI+]1
|
||
|
STA $01 ; now word/pointer at $0+$1 points to 32bit word at VTABLE,X
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
|
||
|
LDVLDA MAC
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +3 ; load from table pointer
|
||
|
STA INPUT32+3 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +2 ; load from table pointer
|
||
|
STA INPUT32+2 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +1 ; load from table pointer
|
||
|
STA INPUT32+1 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 ; load from table pointer
|
||
|
STA INPUT32 ; store in 32 bit "accumulator"
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
LDVLDX MAC
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +3 ; load from table pointer
|
||
|
STA XREGISTER32+3 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +2 ; load from table pointer
|
||
|
STA XREGISTER32+2 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 +1 ; load from table pointer
|
||
|
STA XREGISTER32+1 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA VA + ]1 + ]1 + ]1 + ]1 ; load from table pointer
|
||
|
STA XREGISTER32 ; store in 32 bit "accumulator"
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
LDVSTA MAC
|
||
|
LDA INPUT32+3 ; store in 32 bit "accumulator"
|
||
|
STA VA + ]1 + ]1 + ]1 + ]1 +3 ; load from table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; store in 32 bit "accumulator"
|
||
|
STA VA + ]1 + ]1 + ]1 + ]1 +2 ; load from table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; store in 32 bit "accumulator"
|
||
|
STA VA + ]1 + ]1 + ]1 + ]1 +1 ; load from table pointer
|
||
|
|
||
|
LDA INPUT32 ; store in 32 bit "accumulator"
|
||
|
STA VA + ]1 + ]1 + ]1 + ]1 ; load from table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
VXTOVY MAC ; rotate Vn to Vn-1
|
||
|
|
||
|
LDA VA + ]1+ ]1+ ]1+ ]1 ; load from table pointer
|
||
|
STA VA + ]2+ ]2+ ]2+ ]2 ; store in table pointer
|
||
|
|
||
|
LDA VA + ]1+ ]1+ ]1+ ]1 + 1 ; load from table pointer
|
||
|
STA VA + ]2+ ]2+ ]2+ ]2 + 1 ; store in table pointer
|
||
|
|
||
|
LDA VA + ]1+ ]1+ ]1+ ]1 + 2 ; load from table pointer
|
||
|
STA VA + ]2+ ]2+ ]2+ ]2 + 2 ; store in table pointer
|
||
|
|
||
|
LDA VA + ]1+ ]1+ ]1+ ]1 + 3 ; load from table pointer
|
||
|
STA VA + ]2+ ]2+ ]2+ ]2 + 3 ; store in table pointer
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
|
||
|
LDAW MAC ; X indicates which W0x word to read from
|
||
|
|
||
|
LDA W00 + 3,X ; load from table pointer
|
||
|
STA INPUT32+3 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA W00 + 2,X ; load from table pointer
|
||
|
STA INPUT32+2 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA W00 + 1,X ; load from table pointer
|
||
|
STA INPUT32+1 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA W00,X ; load from table pointer
|
||
|
STA INPUT32 ; store in 32 bit "accumulator"
|
||
|
|
||
|
<<<
|
||
|
|
||
|
|
||
|
LDWSTA32 MAC ; store INPUT32 in W0x word
|
||
|
|
||
|
CLC
|
||
|
|
||
|
ROL
|
||
|
ROL
|
||
|
TAX ;x=A*4
|
||
|
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA W00 + 3,X ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA W00 + 2,X ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA W00 + 1,X ; store in table pointer
|
||
|
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA W00,X ; store in table pointer
|
||
|
|
||
|
<<<
|
||
|
|
||
|
|
||
|
STA32 MAC ; puts 4 bytes from 32 bit "accumulator" INPUT32 into ($01,$00), clobbers A,Y
|
||
|
|
||
|
LDY #$03
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA ($0),Y ; store in table pointer
|
||
|
|
||
|
LDY #$02
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA ($0),Y ; store in table pointer
|
||
|
|
||
|
LDY #$01
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA ($0),Y ; store in table pointer
|
||
|
|
||
|
LDY #$00
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA ($0),Y ; store in table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
|
||
|
STAS1 MAC ; puts 4 bytes from 32 bit "accumulator" INPUT32 into S1
|
||
|
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA S1+3 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA S1+2 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA S1+1 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA S1 ; store in table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
STAS0 MAC ; puts 4 bytes from 32 bit "accumulator" INPUT32 into S0
|
||
|
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA S0+3 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA S0+2 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA S0+1 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA S0 ; store in table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
STATEMP1 MAC ; puts 4 bytes from 32 bit "accumulator" INPUT32 into TEMP0
|
||
|
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA TEMP1+3 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA TEMP1+2 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA TEMP1+1 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA TEMP1 ; store in table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
|
||
|
STATEMP0 MAC ; puts 4 bytes from 32 bit "accumulator" INPUT32 into TEMP0
|
||
|
|
||
|
LDA INPUT32+3 ; load from 32 bit "accumulator"
|
||
|
STA TEMP0+3 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+2 ; load from 32 bit "accumulator"
|
||
|
STA TEMP0+2 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32+1 ; load from 32 bit "accumulator"
|
||
|
STA TEMP0+1 ; store in table pointer
|
||
|
|
||
|
LDA INPUT32 ; load from 32 bit "accumulator"
|
||
|
STA TEMP0 ; store in table pointer
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
|
||
|
|
||
|
|
||
|
LDATEMP0 MAC ; puts 4 bytes from ($01,$00) into 32 bit "accumulator" INPUT32, clobbers A,Y
|
||
|
|
||
|
LDA TEMP0+3 ; load from table pointer
|
||
|
STA INPUT32+3 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA TEMP0+2 ; load from table pointer
|
||
|
STA INPUT32+2 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA TEMP0+1 ; load from table pointer
|
||
|
STA INPUT32+1 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDA TEMP0 ; load from table pointer
|
||
|
STA INPUT32 ; store in 32 bit "accumulator"
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
;/LDATEMP0
|
||
|
|
||
|
|
||
|
LDA32 MAC ; puts 4 bytes from ($01,$00) into 32 bit "accumulator" INPUT32, clobbers A,Y
|
||
|
LDY #$03
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA INPUT32+3 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDY #$02
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA INPUT32+2 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDY #$01
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA INPUT32+1 ; store in 32 bit "accumulator"
|
||
|
|
||
|
LDY #$00
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA INPUT32 ; store in 32 bit "accumulator"
|
||
|
|
||
|
<<< ; End of Macro
|
||
|
;/LDA32
|
||
|
LDX32 MAC ; puts 4 bytes from ($01,$00) into 32 bit "X register" XREGISTER32
|
||
|
LDY #$03
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA XREGISTER32+3 ; store in 32 bit "X register"
|
||
|
|
||
|
LDY #$02
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA XREGISTER32+2 ; store in 32 bit "X register"
|
||
|
|
||
|
LDY #$01
|
||
|
LDA ($0),Y ; load from table pointer
|
||
|
STA XREGIS |