Optimize 320-mode raw pixel decoding for the case where two consecutive pairs of pixels are the same.

This is only a win if we can use the optimized case a reasonable proportion of the time (~40% or more), but that should be the case for most real screen images.

The equality comparisons are written with XORs because that produces better assembly code.
This commit is contained in:
Stephen Heumann 2015-09-28 19:47:40 -05:00
parent 13cd339277
commit 2f13669a2f
1 changed files with 17 additions and 9 deletions

26
raw.cc
View File

@ -158,22 +158,30 @@ void RawDraw (void) {
}
else { /* 320 mode */
while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */
unsigned inPixelsA, inPixelsB, outPixels;
*(unsigned*)destPtr =
*(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr);
outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr));
*(unsigned*)(destPtr+1) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[1]);
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+2) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[2]);
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+3) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[3]);
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+4) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[4]);
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(unsigned*)(destPtr+5) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[5]);
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsB));
*(unsigned*)(destPtr+6) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[6]);
* (destPtr+7) =
*(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[7]);
(inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels :
(outPixels = *(unsigned*)(bigcoltab320 + inPixelsA));
*(destPtr+7) =
(inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels :
*(unsigned*)(bigcoltab320 + inPixelsB);
destPtr += 8;
lineDataPtr += 16;
}