From 2f13669a2f6869930dca558ba6806cbc9da6f133 Mon Sep 17 00:00:00 2001 From: Stephen Heumann Date: Mon, 28 Sep 2015 19:47:40 -0500 Subject: [PATCH] Optimize 320-mode raw pixel decoding for the case where two consecutive pairs of pixels are the same. This is only a win if we can use the optimized case a reasonable proportion of the time (~40% or more), but that should be the case for most real screen images. The equality comparisons are written with XORs because that produces better assembly code. --- raw.cc | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/raw.cc b/raw.cc index a8fd5db..b10fff1 100644 --- a/raw.cc +++ b/raw.cc @@ -158,22 +158,30 @@ void RawDraw (void) { } else { /* 320 mode */ while (destPtr + 7 < finalDestPtr) { /* Unrolled loop */ + unsigned inPixelsA, inPixelsB, outPixels; *(unsigned*)destPtr = - *(unsigned*)(bigcoltab320 + *(unsigned*)lineDataPtr); + outPixels = *(unsigned*)(bigcoltab320 + (inPixelsA = *(unsigned*)lineDataPtr)); *(unsigned*)(destPtr+1) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[1]); + (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[1])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); *(unsigned*)(destPtr+2) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[2]); + (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[2])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); *(unsigned*)(destPtr+3) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[3]); + (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[3])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); *(unsigned*)(destPtr+4) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[4]); + (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[4])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); *(unsigned*)(destPtr+5) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[5]); + (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[5])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsB)); *(unsigned*)(destPtr+6) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[6]); - * (destPtr+7) = - *(unsigned*)(bigcoltab320 + ((unsigned*)lineDataPtr)[7]); + (inPixelsB ^ (inPixelsA = ((unsigned*)lineDataPtr)[6])) == 0 ? outPixels : + (outPixels = *(unsigned*)(bigcoltab320 + inPixelsA)); + *(destPtr+7) = + (inPixelsA ^ (inPixelsB = ((unsigned*)lineDataPtr)[7])) == 0 ? outPixels : + *(unsigned*)(bigcoltab320 + inPixelsB); destPtr += 8; lineDataPtr += 16; }