From aa38f6d209a2a9a1b9b16e09d5df5e81e42e90af Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Fri, 20 May 2016 22:08:05 -0500 Subject: [PATCH] Use faster assembly routine for 320-mode raw pixel decoding too. The assembly code for the 640-mode and 320-mode routines is moved to a macro so it can be shared between them. --- raw.cc | 37 +++------------- rawdec.asm | 97 +++------------------------------------- rawdec.macros | 120 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 127 insertions(+), 127 deletions(-) diff --git a/raw.cc b/raw.cc index 0d65bdf..5203146 100644 --- a/raw.cc +++ b/raw.cc @@ -49,6 +49,9 @@ static unsigned char *destPtr; unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset, unsigned char *lineDataPtr); +unsigned char * rawDecode320(unsigned startOffset, unsigned endOffset, + unsigned char *lineDataPtr); + /* Ends drawing of a raw rectangle when it is complete or aborted * because the rectangle is not visible. @@ -136,38 +139,8 @@ void RawDraw (void) { destPtr++; } else { /* 320 mode */ - while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */ - *(unsigned*)destPtr = - outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr)); - *(unsigned*)(destPtr+1) = - (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); - *(unsigned*)(destPtr+2) = - (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); - *(unsigned*)(destPtr+3) = - (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); - *(unsigned*)(destPtr+4) = - (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); - *(unsigned*)(destPtr+5) = - (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); - *(unsigned*)(destPtr+6) = - (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels : - (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); - *(destPtr+7) = - (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels : - *(unsigned*)(bigcoltab320 + inPixelsB); - destPtr += 8; - lineDataPtr += 16; - } - while (destPtr < finalDestPtr) { - *(destPtr++) = - *(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr); - lineDataPtr += 2; - } + lineDataPtr = rawDecode320(destPtr-destBuf, finalDestPtr-destBuf, lineDataPtr); + destPtr = finalDestPtr; /* Final byte to produce */ *destPtr = pixTransTbl[*(lineDataPtr++)] & 0xF0; if (extraByteAdvance) diff --git a/rawdec.asm b/rawdec.asm index 87eae4f..2605afa 100644 --- a/rawdec.asm +++ b/rawdec.asm @@ -24,95 +24,10 @@ oldDP gequ 8 direct page on entry * unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset, * unsigned char *lineDataPtr); rawDecode640 start rawDec640 -unroll equ 8 loop unrolling factor - - tdc - tax - lda |dpPtr - tcd set new direct page - stx oldDP save direct page on entry - - phb - phb - pla - sta oldDB save data bank on entry - - lda 10,S - pha leaves extra byte: clean up later - plb initialize data bank=bank of lineDataPtr - lda 8+1,S initialize y = lineDataPtr (low 16 bits) - tay - - pla move return address to proper position - sta 8-1,S - pla - sta 10-3,S - - plx - stx destOfst initialize x = destOfst = startOffset - - pla - sta endOfst initialize endOfst = endOffset - - sec - sbc #unroll-1 - bcs doLoop1 if endOffset-7 did not underflow... - jmp test2 - -doLoop1 sta loop1End initialize loop1End = endOffset - 7 - txa a = startOffset - jmp test1 - -loop1 sep #$20 - longa off - loopBody640 unroll - rep #$20 - longa on - tya - clc - adc #unroll*4 carry must be clear - tay - bcs incDB1 -cont1 txa - clc - adc #unroll carry must be clear - sta destOfst -test1 cmp loop1End - bge check2 - jmp loop1 - -check2 cmp endOfst - bge end -loop2 anop - loopBody640 1 - inx - stx destOfst - tya - clc - adc #4 carry must be clear - tay - bcs incDB2 -test2 cpx endOfst - blt loop2 - -end phb - plx x = old DB (high byte of lineDataPtr) - pei (oldDB) - plb restore data bank - plb - lda oldDP - tcd restore direct page - tya a = lineDataPtr (low 16 bits) - rtl - -incDB1 pea cont1-1 - bra incDB -incDB2 pea test2-1 -incDB phb - pla - inc A - pha - plb - clc - rts + rawdec 640,8 + end + +* Same for 320-mode +rawDecode320 start rawDec320 + rawdec 320,8 end diff --git a/rawdec.macros b/rawdec.macros index cbea125..488de23 100644 --- a/rawdec.macros +++ b/rawdec.macros @@ -1,13 +1,125 @@ + macro + loopBody320 &iters + lcla &n +.top320 + ldx |2*&n,Y + lda >BCT320,X + ldx destOfst + sta >destBuf+&n,X +&n seta &n+1 + aif &n<&iters,.top320 + mend + + macro loopBody640 &iters lcla &n -.top +.top640 ldx |4*&n,Y lda >BCT640A,X ldx |4*&n+2,Y ora >BCT640B,X - LDX destOfst - STA >destBuf+&n,X + ldx destOfst + sta >destBuf+&n,X &n seta &n+1 - aif &n<&iters,.top + aif &n<&iters,.top640 + + mend + + + macro + rawDec &mode,&unroll + lclc &loopBdy + lcla &bytePix +&loopBdy setc 'loopBody'+&mode +&bytePix seta &mode/160 + + tdc + tax + lda |dpPtr + tcd set new direct page + stx oldDP save direct page on entry + + phb + phb + pla + sta oldDB save data bank on entry + + lda 10,S + pha leaves extra byte: clean up later + plb initialize data bank=bank of lineDataPtr + lda 8+1,S initialize y = lineDataPtr (low 16 bits) + tay + + pla move return address to proper position + sta 8-1,S + pla + sta 10-3,S + + plx + stx destOfst initialize x = destOfst = startOffset + + pla + sta endOfst initialize endOfst = endOffset + + sec + sbc #&unroll-1 + bcs doLoop1 if endOffset-7 did not underflow... + jmp test2 + +doLoop1 sta loop1End initialize loop1End = endOffset - 7 + txa a = startOffset + jmp test1 + +loop1 anop + sep #$20 + longa off + &loopBdy &unroll + rep #$20 + longa on + tya + adc #&unroll*&bytePix carry must be clear + tay + bcs incDB1 +cont1 txa + adc #&unroll carry must be clear + sta destOfst +test1 cmp loop1End + bge check2 + jmp loop1 + +check2 cmp endOfst + bge end +loop2 anop + &loopBdy 1 + inx + stx destOfst + tya + adc #&bytePix carry must be clear + tay + bcs incDB2 +test2 cpx endOfst + blt loop2 + +end phb + plx x = old DB (high byte of lineDataPtr) + pei (oldDB) + plb restore data bank + plb + lda oldDP + tcd restore direct page + tya a = lineDataPtr (low 16 bits) + rtl + +incDB1 pea cont1-1 + bra incDB +incDB2 pea test2-1 +incDB phb + pla + inc A + pha + plb + clc + rts + mend