Use faster assembly routine for 320-mode raw pixel decoding too.

The assembly code for the 640-mode and 320-mode routines is moved to a macro so it can be shared between them.
This commit is contained in:
Stephen Heumann 2016-05-20 22:08:05 -05:00
parent 9e401f4a83
commit aa38f6d209
3 changed files with 127 additions and 127 deletions

37
raw.cc
View File

@ -49,6 +49,9 @@ static unsigned char *destPtr;
unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset, unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
unsigned char *lineDataPtr); unsigned char *lineDataPtr);
unsigned char * rawDecode320(unsigned startOffset, unsigned endOffset,
unsigned char *lineDataPtr);
/* Ends drawing of a raw rectangle when it is complete or aborted /* Ends drawing of a raw rectangle when it is complete or aborted
* because the rectangle is not visible. * because the rectangle is not visible.
@ -136,38 +139,8 @@ void RawDraw (void) {
destPtr++; destPtr++;
} }
else { /* 320 mode */ else { /* 320 mode */
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */ lineDataPtr = rawDecode320(destPtr-destBuf, finalDestPtr-destBuf, lineDataPtr);
*(unsigned*)destPtr = destPtr = finalDestPtr;
outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr));
*(unsigned*)(destPtr+1) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+2) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+3) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+4) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+5) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+6) =
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(destPtr+7) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels :
*(unsigned*)(bigcoltab320 + inPixelsB);
destPtr += 8;
lineDataPtr += 16;
}
while (destPtr < finalDestPtr) {
*(destPtr++) =
*(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr);
lineDataPtr += 2;
}
/* Final byte to produce */ /* Final byte to produce */
*destPtr = pixTransTbl[*(lineDataPtr++)] & 0xF0; *destPtr = pixTransTbl[*(lineDataPtr++)] & 0xF0;
if (extraByteAdvance) if (extraByteAdvance)

View File

@ -24,95 +24,10 @@ oldDP gequ 8 direct page on entry
* unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset, * unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
* unsigned char *lineDataPtr); * unsigned char *lineDataPtr);
rawDecode640 start rawDec640 rawDecode640 start rawDec640
unroll equ 8 loop unrolling factor rawdec 640,8
end
tdc
tax * Same for 320-mode
lda |dpPtr rawDecode320 start rawDec320
tcd set new direct page rawdec 320,8
stx oldDP save direct page on entry
phb
phb
pla
sta oldDB save data bank on entry
lda 10,S
pha leaves extra byte: clean up later
plb initialize data bank=bank of lineDataPtr
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
tay
pla move return address to proper position
sta 8-1,S
pla
sta 10-3,S
plx
stx destOfst initialize x = destOfst = startOffset
pla
sta endOfst initialize endOfst = endOffset
sec
sbc #unroll-1
bcs doLoop1 if endOffset-7 did not underflow...
jmp test2
doLoop1 sta loop1End initialize loop1End = endOffset - 7
txa a = startOffset
jmp test1
loop1 sep #$20
longa off
loopBody640 unroll
rep #$20
longa on
tya
clc
adc #unroll*4 carry must be clear
tay
bcs incDB1
cont1 txa
clc
adc #unroll carry must be clear
sta destOfst
test1 cmp loop1End
bge check2
jmp loop1
check2 cmp endOfst
bge end
loop2 anop
loopBody640 1
inx
stx destOfst
tya
clc
adc #4 carry must be clear
tay
bcs incDB2
test2 cpx endOfst
blt loop2
end phb
plx x = old DB (high byte of lineDataPtr)
pei (oldDB)
plb restore data bank
plb
lda oldDP
tcd restore direct page
tya a = lineDataPtr (low 16 bits)
rtl
incDB1 pea cont1-1
bra incDB
incDB2 pea test2-1
incDB phb
pla
inc A
pha
plb
clc
rts
end end

View File

@ -1,13 +1,125 @@
macro
loopBody320 &iters
lcla &n
.top320
ldx |2*&n,Y
lda >BCT320,X
ldx destOfst
sta >destBuf+&n,X
&n seta &n+1
aif &n<&iters,.top320
mend
macro macro
loopBody640 &iters loopBody640 &iters
lcla &n lcla &n
.top .top640
ldx |4*&n,Y ldx |4*&n,Y
lda >BCT640A,X lda >BCT640A,X
ldx |4*&n+2,Y ldx |4*&n+2,Y
ora >BCT640B,X ora >BCT640B,X
LDX destOfst ldx destOfst
STA >destBuf+&n,X sta >destBuf+&n,X
&n seta &n+1 &n seta &n+1
aif &n<&iters,.top aif &n<&iters,.top640
mend
macro
rawDec &mode,&unroll
lclc &loopBdy
lcla &bytePix
&loopBdy setc 'loopBody'+&mode
&bytePix seta &mode/160
tdc
tax
lda |dpPtr
tcd set new direct page
stx oldDP save direct page on entry
phb
phb
pla
sta oldDB save data bank on entry
lda 10,S
pha leaves extra byte: clean up later
plb initialize data bank=bank of lineDataPtr
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
tay
pla move return address to proper position
sta 8-1,S
pla
sta 10-3,S
plx
stx destOfst initialize x = destOfst = startOffset
pla
sta endOfst initialize endOfst = endOffset
sec
sbc #&unroll-1
bcs doLoop1 if endOffset-7 did not underflow...
jmp test2
doLoop1 sta loop1End initialize loop1End = endOffset - 7
txa a = startOffset
jmp test1
loop1 anop
sep #$20
longa off
&loopBdy &unroll
rep #$20
longa on
tya
adc #&unroll*&bytePix carry must be clear
tay
bcs incDB1
cont1 txa
adc #&unroll carry must be clear
sta destOfst
test1 cmp loop1End
bge check2
jmp loop1
check2 cmp endOfst
bge end
loop2 anop
&loopBdy 1
inx
stx destOfst
tya
adc #&bytePix carry must be clear
tay
bcs incDB2
test2 cpx endOfst
blt loop2
end phb
plx x = old DB (high byte of lineDataPtr)
pei (oldDB)
plb restore data bank
plb
lda oldDP
tcd restore direct page
tya a = lineDataPtr (low 16 bits)
rtl
incDB1 pea cont1-1
bra incDB
incDB2 pea test2-1
incDB phb
pla
inc A
pha
plb
clc
rts
mend mend