Switch to using faster assembly routine (instead of C) for 640-mode pixel decoding loop.

This commit is contained in:
Stephen Heumann 2016-05-14 17:30:44 -05:00
parent 12999358b5
commit 9e401f4a83
5 changed files with 149 additions and 52 deletions

4
make
View File

@ -26,7 +26,7 @@ for file in vncview vncsession vncdisplay colortables \
end
end
for file in tables
for file in tables rawdec
unset exit
newer {file}.a {file}.asm
if {status} != 0
@ -45,7 +45,7 @@ if {status} != 0
end
if {link} == true
link vncview vncsession vncdisplay colortables tables \
link vncview vncsession vncdisplay colortables tables rawdec \
desktopsize mouse keyboard copyrect raw hextile clipboard \
keep=VNCview.GS
filetype VNCview.GS S16 $DB03

54
raw.cc
View File

@ -47,6 +47,9 @@ extern unsigned char destBuf[];
static unsigned char *destPtr;
unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
unsigned char *lineDataPtr);
/* Ends drawing of a raw rectangle when it is complete or aborted
* because the rectangle is not visible.
*/
@ -116,55 +119,8 @@ void RawDraw (void) {
finalDestPtr = destPtr + lineBytes - 1;
if (hRez == 640) {
initialLineDataPtr = lineDataPtr;
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */
*(unsigned*)destPtr =
outPixels = (*(unsigned*)(bigcoltab640a + (inPixelsA = *(unsigned*)lineDataPtr))
^ *(unsigned*)(bigcoltab640b + (inPixelsB = ((unsigned*)lineDataPtr)[1])));
*(unsigned*)(destPtr+1) =
((inPixelsA ^ (inPixelsA2 = ((unsigned*)lineDataPtr)[2])) |
(inPixelsB ^ (inPixelsB2 = ((unsigned*)lineDataPtr)[3]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA2)
^ *(unsigned*)(bigcoltab640b + inPixelsB2)));
*(unsigned*)(destPtr+2) =
((inPixelsA2 ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) |
(inPixelsB2 ^ (inPixelsB = ((unsigned*)lineDataPtr)[5]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA)
^ *(unsigned*)(bigcoltab640b + inPixelsB)));
*(unsigned*)(destPtr+3) =
((inPixelsA ^ (inPixelsA2 = ((unsigned*)lineDataPtr)[6])) |
(inPixelsB ^ (inPixelsB2 = ((unsigned*)lineDataPtr)[7]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA2)
^ *(unsigned*)(bigcoltab640b + inPixelsB2)));
*(unsigned*)(destPtr+4) =
((inPixelsA2 ^ (inPixelsA = ((unsigned*)lineDataPtr)[8])) |
(inPixelsB2 ^ (inPixelsB = ((unsigned*)lineDataPtr)[9]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA)
^ *(unsigned*)(bigcoltab640b + inPixelsB)));
*(unsigned*)(destPtr+5) =
((inPixelsA ^ (inPixelsA2 = ((unsigned*)lineDataPtr)[10])) |
(inPixelsB ^ (inPixelsB2 = ((unsigned*)lineDataPtr)[11]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA2)
^ *(unsigned*)(bigcoltab640b + inPixelsB2)));
*(unsigned*)(destPtr+6) =
((inPixelsA2 ^ (inPixelsA = ((unsigned*)lineDataPtr)[12])) |
(inPixelsB2 ^ (inPixelsB = ((unsigned*)lineDataPtr)[13]))) == 0 ? outPixels :
(outPixels = (*(unsigned*)(bigcoltab640a + inPixelsA)
^ *(unsigned*)(bigcoltab640b + inPixelsB)));
*(destPtr+7) =
((inPixelsA ^ (inPixelsA2 = ((unsigned*)lineDataPtr)[14])) |
(inPixelsB ^ (inPixelsB2 = ((unsigned*)lineDataPtr)[15]))) == 0 ? outPixels :
((*(unsigned*)(bigcoltab640a + inPixelsA2)
^ *(unsigned*)(bigcoltab640b + inPixelsB2)));
destPtr += 8;
lineDataPtr += 32;
}
while (destPtr < finalDestPtr) {
*(destPtr++) =
*(unsigned*)(bigcoltab640a + *(unsigned*)lineDataPtr)
+ *(unsigned*)(bigcoltab640b + ((unsigned*)lineDataPtr)[1]);
lineDataPtr += 4;
}
/* Final byte to produce */
lineDataPtr = rawDecode640(destPtr-destBuf, finalDestPtr-destBuf, lineDataPtr);
destPtr = finalDestPtr;
*destPtr = pixTransTbl[*(lineDataPtr++)] & 0xC0;
for (i = lineDataPtr - initialLineDataPtr; i < rectWidth; i++)
switch (i & 0x03) {

118
rawdec.asm Normal file
View File

@ -0,0 +1,118 @@
mcopy rawdec.macros
gen on
case on
* Pointer to direct page space for use by these routines
dpPtr data
ds 4
end
* DP locations
destOfst gequ 0 offset into destBuf to start at
endOfst gequ 2 offset into destBuf to end before
loop1End gequ 4 offset into destBuf to end loop 1
oldDB gequ 6 data bank on entry
oldDP gequ 8 direct page on entry
* Generates 640-mode SHR pixels [destBuf+startOffset, destBuf+endOffset)
* from raw pixels starting at lineDataPtr. Returns next lineDataPtr value.
*
* First loop is unrolled to minimize index calculation overhead,
* and runs until less that 8 iterations (output bytes) remain.
* Second loop generates the remaining bytes.
*
* unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
* unsigned char *lineDataPtr);
rawDecode640 start rawDec640
unroll equ 8 loop unrolling factor
tdc
tax
lda |dpPtr
tcd set new direct page
stx oldDP save direct page on entry
phb
phb
pla
sta oldDB save data bank on entry
lda 10,S
pha leaves extra byte: clean up later
plb initialize data bank=bank of lineDataPtr
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
tay
pla move return address to proper position
sta 8-1,S
pla
sta 10-3,S
plx
stx destOfst initialize x = destOfst = startOffset
pla
sta endOfst initialize endOfst = endOffset
sec
sbc #unroll-1
bcs doLoop1 if endOffset-7 did not underflow...
jmp test2
doLoop1 sta loop1End initialize loop1End = endOffset - 7
txa a = startOffset
jmp test1
loop1 sep #$20
longa off
loopBody640 unroll
rep #$20
longa on
tya
clc
adc #unroll*4 carry must be clear
tay
bcs incDB1
cont1 txa
clc
adc #unroll carry must be clear
sta destOfst
test1 cmp loop1End
bge check2
jmp loop1
check2 cmp endOfst
bge end
loop2 anop
loopBody640 1
inx
stx destOfst
tya
clc
adc #4 carry must be clear
tay
bcs incDB2
test2 cpx endOfst
blt loop2
end phb
plx x = old DB (high byte of lineDataPtr)
pei (oldDB)
plb restore data bank
plb
lda oldDP
tcd restore direct page
tya a = lineDataPtr (low 16 bits)
rtl
incDB1 pea cont1-1
bra incDB
incDB2 pea test2-1
incDB phb
pla
inc A
pha
plb
clc
rts
end

13
rawdec.macros Normal file
View File

@ -0,0 +1,13 @@
macro
loopBody640 &iters
lcla &n
.top
ldx |4*&n,Y
lda >BCT640A,X
ldx |4*&n+2,Y
ora >BCT640B,X
LDX destOfst
STA >destBuf+&n,X
&n seta &n+1
aif &n<&iters,.top
mend

View File

@ -76,7 +76,8 @@ BOOLEAN vncConnected = FALSE; /* are we connected to a VNC host */
int menuOffset; /* Indicates which menu bar is active */
static Ref startStopParm; /* tool start/shutdown parameter */
BOOLEAN colorTablesComplete = FALSE; /* Are the big color tables complete */
static Handle dpHndl; /* direct page space for assembly routines */
extern void *dpPtr; /* pointer to DP for assembly routines */
/* Connection options */
int hRez = 320;
@ -357,6 +358,8 @@ static void Quit (void) {
if (readBufferHndl)
DisposeHandle(readBufferHndl); /* Get rid of TCPIP read buffer hndl */
if (dpHndl)
DisposeHandle(dpHndl);
if (cursor)
free(cursor);
@ -400,6 +403,12 @@ int main (void) {
}
readBufferHndl = NewHandle(1, userid(), 0, NULL);
if (toolerror()) goto oomQuit;
dpHndl = NewHandle(0x0100, userid(),
attrLocked|attrFixed|attrPage|attrNoCross|attrBank, 0x00000000);
if (toolerror()) goto oomQuit;
dpPtr = *dpHndl;
LoadOneTool(54, 0x200); /* load Marinetti 2.0+ */
if (toolerror()) { /* Check that Marinetti is available */
@ -412,6 +421,7 @@ int main (void) {
TCPIPStartUp(); /* ... so activate it now */
if (toolerror()) { /* Get handle for TCPIP read buffer */
oomQuit:
SysBeep();
InitCursor();
AlertWindow(awResource, NULL, outOfMemoryError);