* Copyright (c) 2017 Stephen Heumann * * Permission to use, copy, modify, and/or distribute this software for any * purpose with or without fee is hereby granted, provided that the above * copyright notice and this permission notice appear in all copies. * * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. * Right-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions macro ROTR4 &loc,&n aif &n>16,.dorotl lda &loc+2 lcla &i &i seta &n .rotrloop lsr a ;to set carry ror &loc ror &loc+2 &i seta &i-1 aif &i>0,.rotrloop ago .end .dorotl ROTL4 &loc,32-&n .end mend * Expects the contents of &loc+2 to be loaded in A already macro ROTR4CONT &loc,&n aif &n>16,.dorotl lcla &i &i seta &n .rotrloop lsr a ;to set carry ror &loc ror &loc+2 &i seta &i-1 aif &i>0,.rotrloop ago .end .dorotl ROTL4 &loc,32-&n .end mend * Left-rotate 32-bit value in &loc (DP or 16-bit address) by &n positions macro ROTL4 &loc,&n aif &n>16,.dorotr2 lda &loc lcla &i &i seta &n .rotlloop2 asl a ;to set carry rol &loc+2 rol &loc &i seta &i-1 aif &i>0,.rotlloop2 ago .end2 .dorotr2 ROTR4 &loc,32-&n .end2 mend * Expects the contents of &loc to be loaded in A already macro ROTL4CONT &loc,&n aif &n>16,.dorotr2 lcla &i &i seta &n .rotlloop2 asl a ;to set carry rol &loc+2 rol &loc &i seta &i-1 aif &i>0,.rotlloop2 ago .end2 .dorotr2 ROTR4 &loc,32-&n .end2 mend * &to := &from ROTR4 &n macro ROTR4MOVE &to,&from,&n aif &n>16,.dorotl3 lda &from sta &to lda &from+2 sta &to+2 lcla &i &i seta &n .rotrloop3 lsr a ;to set carry ror &to ror &to+2 &i seta &i-1 aif &i>0,.rotrloop3 ago .end3 .dorotl3 ROTL4MOVE &to,&from,32-&n .end3 mend * &to := &from ROTL4 &n macro ROTL4MOVE &to,&from,&n aif &n>16,.dorotr4 lda &from+2 sta &to+2 lda &from sta &to lcla &i &i seta &n .rotlloop4 asl a ;to set carry rol &to+2 rol &to &i seta &i-1 aif &i>0,.rotlloop4 ago .end4 .dorotr4 ROTR4MOVE &to,&from,32-&n .end4 mend * Left-rotate with various optimizations applied macro ROTL4AUTO &loc,&n,&haveLocPlus2 aif &n>4,.skip1 ROTL4 &loc,&n mexit .skip1 aif &n>7,.skip2 ldx &loc+1 lda &loc+3 ora &loc-1 sta &loc stx &loc+2 txa ROTR4CONT &loc,8-&n mexit .skip2 aif &n>11,.skip3 ldx &loc+1 lda &loc+3 ora &loc-1 sta &loc stx &loc+2 ROTL4CONT &loc,&n-8 mexit .skip3 aif &n>15,.skip4 aif C:&haveLocPlus2>0,.noload1 ldy &loc+2 ago .didload1 .noload1 tay .didload1 lda &loc sta &loc+2 sty &loc ROTR4CONT &loc,16-&n mexit .skip4 aif &n>16,.skip5 aif C:&haveLocPlus2>0,.noload2 lda &loc+2 .noload2 ldy &loc sta &loc sty &loc+2 mexit .skip5 aif &n>20,.skip6 aif C:&haveLocPlus2>0,.noload2 lda &loc+2 .noload2 ldy &loc sta &loc sty &loc+2 ROTL4CONT &loc,&n-16 mexit .skip6 ldx &loc+1 lda &loc+3 ora &loc-1 sta &loc+2 stx &loc ROTR4CONT &loc,24-&n mend * This makes a function wrapper that is callable from C, * taking a pointer to the context structure as its argument. macro CFunction &fn phb plx ply tdc pld plb plb phy phx plb pha jsl &fn pld rtl mend * One iteration of the loop for processing blocks. * The a,b,c,d variables are given as parameters so we can avoid cycling them. * shift is a per-round shift amount. macro BlockLoopIter &a,&b,&c,&d,&shift * f_0 to f_15 aif &part<>1,.skip1 lda &c eor &d and &b eor &d clc adc &a sta temp lda &c+2 eor &d+2 and &b+2 eor &d+2 adc &a+2 sta temp+2 .skip1 * f_16 to f_31 aif &part<>2,.skip2 lda &b eor &c and &d eor &c clc adc &a sta temp lda &b+2 eor &c+2 and &d+2 eor &c+2 adc &a+2 sta temp+2 .skip2 * f_32 to f_47 aif &part<>3,.skip3 lda &b eor &c eor &d clc adc &a sta temp lda &b+2 eor &c+2 eor &d+2 adc &a+2 sta temp+2 .skip3 * f_48 to f_63 aif &part<>4,.skip4 lda &d eor #$FFFF ora &b eor &c clc adc &a sta temp lda &d+2 eor #$FFFF ora &b+2 eor &c+2 adc &a+2 sta temp+2 .skip4 ldy idx ldx g_times_4,y lda m,x clc adc temp sta temp inx inx lda m,x adc temp+2 tax lda k,y clc adc temp sta temp iny iny txa adc k,y sta temp+2 iny iny sty idx ROTL4AUTO temp,&shift,1 clc lda &b adc temp sta &a lda &b+2 adc temp+2 sta &a+2 mend * One part of the loop for processing blocks (16 iterations) macro BlockLoopPart &part loop&part anop aif &part<>1,.skip1a BlockLoopIter a_,b,c,d,7 BlockLoopIter d,a_,b,c,12 BlockLoopIter c,d,a_,b,17 BlockLoopIter b,c,d,a_,22 .skip1a aif &part<>2,.skip2a BlockLoopIter a_,b,c,d,5 BlockLoopIter d,a_,b,c,9 BlockLoopIter c,d,a_,b,14 BlockLoopIter b,c,d,a_,20 .skip2a aif &part<>3,.skip3a BlockLoopIter a_,b,c,d,4 BlockLoopIter d,a_,b,c,11 BlockLoopIter c,d,a_,b,16 BlockLoopIter b,c,d,a_,23 .skip3a aif &part<>4,.skip4a BlockLoopIter a_,b,c,d,6 BlockLoopIter d,a_,b,c,10 BlockLoopIter c,d,a_,b,15 BlockLoopIter b,c,d,a_,21 .skip4a lda idx cmp #16*4*&part bge endloop&part jmp loop&part endloop&part anop mend