Use faster assembly routine for 320-mode raw pixel decoding too.

The assembly code for the 640-mode and 320-mode routines is moved to a macro so it can be shared between them.
This commit is contained in:
Stephen Heumann 2016-05-20 22:08:05 -05:00
parent 9e401f4a83
commit aa38f6d209
3 changed files with 127 additions and 127 deletions

37
raw.cc
View File

@ -49,6 +49,9 @@ static unsigned char *destPtr;
unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
unsigned char *lineDataPtr);
unsigned char * rawDecode320(unsigned startOffset, unsigned endOffset,
unsigned char *lineDataPtr);
/* Ends drawing of a raw rectangle when it is complete or aborted
* because the rectangle is not visible.
@ -136,38 +139,8 @@ void RawDraw (void) {
destPtr++;
}
else { /* 320 mode */
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */
*(unsigned*)destPtr =
outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr));
*(unsigned*)(destPtr+1) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+2) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+3) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+4) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+5) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+6) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(destPtr+7) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels :
*(unsigned*)(bigcoltab320 + inPixelsB);
destPtr += 8;
lineDataPtr += 16;
}
while (destPtr < finalDestPtr) {
*(destPtr++) =
*(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr);
lineDataPtr += 2;
}
lineDataPtr = rawDecode320(destPtr-destBuf, finalDestPtr-destBuf, lineDataPtr);
destPtr = finalDestPtr;
/* Final byte to produce */
*destPtr = pixTransTbl[*(lineDataPtr++)] & 0xF0;
if (extraByteAdvance)

View File

@ -24,95 +24,10 @@ oldDP gequ 8 direct page on entry
* unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
* unsigned char *lineDataPtr);
rawDecode640 start rawDec640
unroll equ 8 loop unrolling factor
tdc
tax
lda |dpPtr
tcd set new direct page
stx oldDP save direct page on entry
phb
phb
pla
sta oldDB save data bank on entry
lda 10,S
pha leaves extra byte: clean up later
plb initialize data bank=bank of lineDataPtr
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
tay
pla move return address to proper position
sta 8-1,S
pla
sta 10-3,S
plx
stx destOfst initialize x = destOfst = startOffset
pla
sta endOfst initialize endOfst = endOffset
sec
sbc #unroll-1
bcs doLoop1 if endOffset-7 did not underflow...
jmp test2
doLoop1 sta loop1End initialize loop1End = endOffset - 7
txa a = startOffset
jmp test1
loop1 sep #$20
longa off
loopBody640 unroll
rep #$20
longa on
tya
clc
adc #unroll*4 carry must be clear
tay
bcs incDB1
cont1 txa
clc
adc #unroll carry must be clear
sta destOfst
test1 cmp loop1End
bge check2
jmp loop1
check2 cmp endOfst
bge end
loop2 anop
loopBody640 1
inx
stx destOfst
tya
clc
adc #4 carry must be clear
tay
bcs incDB2
test2 cpx endOfst
blt loop2
end phb
plx x = old DB (high byte of lineDataPtr)
pei (oldDB)
plb restore data bank
plb
lda oldDP
tcd restore direct page
tya a = lineDataPtr (low 16 bits)
rtl
incDB1 pea cont1-1
bra incDB
incDB2 pea test2-1
incDB phb
pla
inc A
pha
plb
clc
rts
rawdec 640,8
end
* Same for 320-mode
rawDecode320 start rawDec320
rawdec 320,8
end

View File

@ -1,13 +1,125 @@
macro
loopBody320 &iters
lcla &n
.top320
ldx |2*&n,Y
lda >BCT320,X
ldx destOfst
sta >destBuf+&n,X
&n seta &n+1
aif &n<&iters,.top320
mend
macro
loopBody640 &iters
lcla &n
.top
.top640
ldx |4*&n,Y
lda >BCT640A,X
ldx |4*&n+2,Y
ora >BCT640B,X
LDX destOfst
STA >destBuf+&n,X
ldx destOfst
sta >destBuf+&n,X
&n seta &n+1
aif &n<&iters,.top
aif &n<&iters,.top640
mend
macro
rawDec &mode,&unroll
lclc &loopBdy
lcla &bytePix
&loopBdy setc 'loopBody'+&mode
&bytePix seta &mode/160
tdc
tax
lda |dpPtr
tcd set new direct page
stx oldDP save direct page on entry
phb
phb
pla
sta oldDB save data bank on entry
lda 10,S
pha leaves extra byte: clean up later
plb initialize data bank=bank of lineDataPtr
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
tay
pla move return address to proper position
sta 8-1,S
pla
sta 10-3,S
plx
stx destOfst initialize x = destOfst = startOffset
pla
sta endOfst initialize endOfst = endOffset
sec
sbc #&unroll-1
bcs doLoop1 if endOffset-7 did not underflow...
jmp test2
doLoop1 sta loop1End initialize loop1End = endOffset - 7
txa a = startOffset
jmp test1
loop1 anop
sep #$20
longa off
&loopBdy &unroll
rep #$20
longa on
tya
adc #&unroll*&bytePix carry must be clear
tay
bcs incDB1
cont1 txa
adc #&unroll carry must be clear
sta destOfst
test1 cmp loop1End
bge check2
jmp loop1
check2 cmp endOfst
bge end
loop2 anop
&loopBdy 1
inx
stx destOfst
tya
adc #&bytePix carry must be clear
tay
bcs incDB2
test2 cpx endOfst
blt loop2
end phb
plx x = old DB (high byte of lineDataPtr)
pei (oldDB)
plb restore data bank
plb
lda oldDP
tcd restore direct page
tya a = lineDataPtr (low 16 bits)
rtl
incDB1 pea cont1-1
bra incDB
incDB2 pea test2-1
incDB phb
pla
inc A
pha
plb
clc
rts
mend