mirror of
https://github.com/sheumann/VNCviewGS.git
synced 2024-11-24 00:30:46 +00:00
Use faster assembly routine for 320-mode raw pixel decoding too.
The assembly code for the 640-mode and 320-mode routines is moved to a macro so it can be shared between them.
This commit is contained in:
parent
9e401f4a83
commit
aa38f6d209
37
raw.cc
37
raw.cc
@ -49,6 +49,9 @@ static unsigned char *destPtr;
|
||||
|
||||
unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
|
||||
unsigned char *lineDataPtr);
|
||||
unsigned char * rawDecode320(unsigned startOffset, unsigned endOffset,
|
||||
unsigned char *lineDataPtr);
|
||||
|
||||
|
||||
/* Ends drawing of a raw rectangle when it is complete or aborted
|
||||
* because the rectangle is not visible.
|
||||
@ -136,38 +139,8 @@ void RawDraw (void) {
|
||||
destPtr++;
|
||||
}
|
||||
else { /* 320 mode */
|
||||
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */
|
||||
*(unsigned*)destPtr =
|
||||
outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr));
|
||||
*(unsigned*)(destPtr+1) =
|
||||
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
|
||||
*(unsigned*)(destPtr+2) =
|
||||
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
|
||||
*(unsigned*)(destPtr+3) =
|
||||
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
|
||||
*(unsigned*)(destPtr+4) =
|
||||
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
|
||||
*(unsigned*)(destPtr+5) =
|
||||
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
|
||||
*(unsigned*)(destPtr+6) =
|
||||
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels :
|
||||
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
|
||||
*(destPtr+7) =
|
||||
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels :
|
||||
*(unsigned*)(bigcoltab320 + inPixelsB);
|
||||
destPtr += 8;
|
||||
lineDataPtr += 16;
|
||||
}
|
||||
while (destPtr < finalDestPtr) {
|
||||
*(destPtr++) =
|
||||
*(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr);
|
||||
lineDataPtr += 2;
|
||||
}
|
||||
lineDataPtr = rawDecode320(destPtr-destBuf, finalDestPtr-destBuf, lineDataPtr);
|
||||
destPtr = finalDestPtr;
|
||||
/* Final byte to produce */
|
||||
*destPtr = pixTransTbl[*(lineDataPtr++)] & 0xF0;
|
||||
if (extraByteAdvance)
|
||||
|
97
rawdec.asm
97
rawdec.asm
@ -24,95 +24,10 @@ oldDP gequ 8 direct page on entry
|
||||
* unsigned char * rawDecode640(unsigned startOffset, unsigned endOffset,
|
||||
* unsigned char *lineDataPtr);
|
||||
rawDecode640 start rawDec640
|
||||
unroll equ 8 loop unrolling factor
|
||||
|
||||
tdc
|
||||
tax
|
||||
lda |dpPtr
|
||||
tcd set new direct page
|
||||
stx oldDP save direct page on entry
|
||||
|
||||
phb
|
||||
phb
|
||||
pla
|
||||
sta oldDB save data bank on entry
|
||||
|
||||
lda 10,S
|
||||
pha leaves extra byte: clean up later
|
||||
plb initialize data bank=bank of lineDataPtr
|
||||
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
|
||||
tay
|
||||
|
||||
pla move return address to proper position
|
||||
sta 8-1,S
|
||||
pla
|
||||
sta 10-3,S
|
||||
|
||||
plx
|
||||
stx destOfst initialize x = destOfst = startOffset
|
||||
|
||||
pla
|
||||
sta endOfst initialize endOfst = endOffset
|
||||
|
||||
sec
|
||||
sbc #unroll-1
|
||||
bcs doLoop1 if endOffset-7 did not underflow...
|
||||
jmp test2
|
||||
|
||||
doLoop1 sta loop1End initialize loop1End = endOffset - 7
|
||||
txa a = startOffset
|
||||
jmp test1
|
||||
|
||||
loop1 sep #$20
|
||||
longa off
|
||||
loopBody640 unroll
|
||||
rep #$20
|
||||
longa on
|
||||
tya
|
||||
clc
|
||||
adc #unroll*4 carry must be clear
|
||||
tay
|
||||
bcs incDB1
|
||||
cont1 txa
|
||||
clc
|
||||
adc #unroll carry must be clear
|
||||
sta destOfst
|
||||
test1 cmp loop1End
|
||||
bge check2
|
||||
jmp loop1
|
||||
|
||||
check2 cmp endOfst
|
||||
bge end
|
||||
loop2 anop
|
||||
loopBody640 1
|
||||
inx
|
||||
stx destOfst
|
||||
tya
|
||||
clc
|
||||
adc #4 carry must be clear
|
||||
tay
|
||||
bcs incDB2
|
||||
test2 cpx endOfst
|
||||
blt loop2
|
||||
|
||||
end phb
|
||||
plx x = old DB (high byte of lineDataPtr)
|
||||
pei (oldDB)
|
||||
plb restore data bank
|
||||
plb
|
||||
lda oldDP
|
||||
tcd restore direct page
|
||||
tya a = lineDataPtr (low 16 bits)
|
||||
rtl
|
||||
|
||||
incDB1 pea cont1-1
|
||||
bra incDB
|
||||
incDB2 pea test2-1
|
||||
incDB phb
|
||||
pla
|
||||
inc A
|
||||
pha
|
||||
plb
|
||||
clc
|
||||
rts
|
||||
rawdec 640,8
|
||||
end
|
||||
|
||||
* Same for 320-mode
|
||||
rawDecode320 start rawDec320
|
||||
rawdec 320,8
|
||||
end
|
||||
|
120
rawdec.macros
120
rawdec.macros
@ -1,13 +1,125 @@
|
||||
macro
|
||||
loopBody320 &iters
|
||||
lcla &n
|
||||
.top320
|
||||
ldx |2*&n,Y
|
||||
lda >BCT320,X
|
||||
ldx destOfst
|
||||
sta >destBuf+&n,X
|
||||
&n seta &n+1
|
||||
aif &n<&iters,.top320
|
||||
mend
|
||||
|
||||
|
||||
macro
|
||||
loopBody640 &iters
|
||||
lcla &n
|
||||
.top
|
||||
.top640
|
||||
ldx |4*&n,Y
|
||||
lda >BCT640A,X
|
||||
ldx |4*&n+2,Y
|
||||
ora >BCT640B,X
|
||||
LDX destOfst
|
||||
STA >destBuf+&n,X
|
||||
ldx destOfst
|
||||
sta >destBuf+&n,X
|
||||
&n seta &n+1
|
||||
aif &n<&iters,.top
|
||||
aif &n<&iters,.top640
|
||||
|
||||
mend
|
||||
|
||||
|
||||
macro
|
||||
rawDec &mode,&unroll
|
||||
lclc &loopBdy
|
||||
lcla &bytePix
|
||||
&loopBdy setc 'loopBody'+&mode
|
||||
&bytePix seta &mode/160
|
||||
|
||||
tdc
|
||||
tax
|
||||
lda |dpPtr
|
||||
tcd set new direct page
|
||||
stx oldDP save direct page on entry
|
||||
|
||||
phb
|
||||
phb
|
||||
pla
|
||||
sta oldDB save data bank on entry
|
||||
|
||||
lda 10,S
|
||||
pha leaves extra byte: clean up later
|
||||
plb initialize data bank=bank of lineDataPtr
|
||||
lda 8+1,S initialize y = lineDataPtr (low 16 bits)
|
||||
tay
|
||||
|
||||
pla move return address to proper position
|
||||
sta 8-1,S
|
||||
pla
|
||||
sta 10-3,S
|
||||
|
||||
plx
|
||||
stx destOfst initialize x = destOfst = startOffset
|
||||
|
||||
pla
|
||||
sta endOfst initialize endOfst = endOffset
|
||||
|
||||
sec
|
||||
sbc #&unroll-1
|
||||
bcs doLoop1 if endOffset-7 did not underflow...
|
||||
jmp test2
|
||||
|
||||
doLoop1 sta loop1End initialize loop1End = endOffset - 7
|
||||
txa a = startOffset
|
||||
jmp test1
|
||||
|
||||
loop1 anop
|
||||
sep #$20
|
||||
longa off
|
||||
&loopBdy &unroll
|
||||
rep #$20
|
||||
longa on
|
||||
tya
|
||||
adc #&unroll*&bytePix carry must be clear
|
||||
tay
|
||||
bcs incDB1
|
||||
cont1 txa
|
||||
adc #&unroll carry must be clear
|
||||
sta destOfst
|
||||
test1 cmp loop1End
|
||||
bge check2
|
||||
jmp loop1
|
||||
|
||||
check2 cmp endOfst
|
||||
bge end
|
||||
loop2 anop
|
||||
&loopBdy 1
|
||||
inx
|
||||
stx destOfst
|
||||
tya
|
||||
adc #&bytePix carry must be clear
|
||||
tay
|
||||
bcs incDB2
|
||||
test2 cpx endOfst
|
||||
blt loop2
|
||||
|
||||
end phb
|
||||
plx x = old DB (high byte of lineDataPtr)
|
||||
pei (oldDB)
|
||||
plb restore data bank
|
||||
plb
|
||||
lda oldDP
|
||||
tcd restore direct page
|
||||
tya a = lineDataPtr (low 16 bits)
|
||||
rtl
|
||||
|
||||
incDB1 pea cont1-1
|
||||
bra incDB
|
||||
incDB2 pea test2-1
|
||||
incDB phb
|
||||
pla
|
||||
inc A
|
||||
pha
|
||||
plb
|
||||
clc
|
||||
rts
|
||||
|
||||
mend
|
||||
|
Loading…
Reference in New Issue
Block a user