Optimize 320-mode raw pixel decoding for the case where two consecutive pairs of pixels are the same.

This is only a win if we can use the optimized case a reasonable proportion of the time (~40% or more), but that should be the case for most real screen images.

The equality comparisons are written with XORs because that produces better assembly code.
This commit is contained in:
Stephen Heumann 2015-09-28 19:47:40 -05:00
parent 13cd339277
commit 2f13669a2f

24
raw.cc
View File

@ -158,22 +158,30 @@ void RawDraw (void) {
} }
else { /* 320 mode */ else { /* 320 mode */
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */ while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */
unsigned inPixelsA, inPixelsB, outPixels;
*(unsigned*)destPtr = *(unsigned*)destPtr =
*(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr); outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr));
*(unsigned*)(destPtr+1) = *(unsigned*)(destPtr+1) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[1]); (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+2) = *(unsigned*)(destPtr+2) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[2]); (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+3) = *(unsigned*)(destPtr+3) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[3]); (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+4) = *(unsigned*)(destPtr+4) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[4]); (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+5) = *(unsigned*)(destPtr+5) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[5]); (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+6) = *(unsigned*)(destPtr+6) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[6]); (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(destPtr+7) = *(destPtr+7) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[7]); (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels :
*(unsigned*)(bigcoltab320 + inPixelsB);
destPtr += 8; destPtr += 8;
lineDataPtr += 16; lineDataPtr += 16;
} }