decompress_bunzip2: keep bd->writeCRC in CPU reg in the hot loop

-5 bytes on 64-bit, +7 bytes on 32-bit.

Signed-off-by: Denys Vlasenko <dvlasenk@redhat.com>
This commit is contained in:
Denys Vlasenko 2010-10-29 18:16:29 +02:00
parent f29a1c5656
commit bf3bec51fc

View File

@ -492,15 +492,20 @@ static int get_next_block(bunzip_data *bd)
int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len) int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
{ {
const uint32_t *dbuf; const uint32_t *dbuf;
int pos, current, previous, gotcount; int pos, current, previous, out_count;
uint32_t CRC;
/* If last read was short due to end of file, return last block now */ /* If we already have error/end indicator, return it */
if (bd->writeCount < 0) return bd->writeCount; if (bd->writeCount < 0)
return bd->writeCount;
gotcount = 0; out_count = 0;
dbuf = bd->dbuf; dbuf = bd->dbuf;
/* Register-cached state (hopefully): */
pos = bd->writePos; pos = bd->writePos;
current = bd->writeCurrent; current = bd->writeCurrent;
CRC = bd->writeCRC; /* small loss on x86-32 (not enough regs), win on x86-64 */
/* We will always have pending decoded data to write into the output /* We will always have pending decoded data to write into the output
buffer unless this is the very first call (in which case we haven't buffer unless this is the very first call (in which case we haven't
@ -514,8 +519,8 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
/* Loop outputting bytes */ /* Loop outputting bytes */
for (;;) { for (;;) {
/* If the output buffer is full, snapshot state and return */ /* If the output buffer is full, save cached state and return */
if (gotcount >= len) { if (out_count >= len) {
/* Unlikely branch. /* Unlikely branch.
* Use of "goto" instead of keeping code here * Use of "goto" instead of keeping code here
* helps compiler to realize this. */ * helps compiler to realize this. */
@ -523,17 +528,16 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
} }
/* Write next byte into output buffer, updating CRC */ /* Write next byte into output buffer, updating CRC */
outbuf[gotcount++] = current; outbuf[out_count++] = current;
bd->writeCRC = (bd->writeCRC << 8) CRC = (CRC << 8) ^ bd->crc32Table[(CRC >> 24) ^ current];
^ bd->crc32Table[(bd->writeCRC >> 24) ^ current];
/* Loop now if we're outputting multiple copies of this byte */ /* Loop now if we're outputting multiple copies of this byte */
if (bd->writeCopies) { if (bd->writeCopies) {
/* Unlikely branch */ /* Unlikely branch */
/*--bd->writeCopies;*/ /*--bd->writeCopies;*/
/*continue;*/ /*continue;*/
/* Same, but (ab)using other existing --writeCopies operation. /* Same, but (ab)using other existing --writeCopies operation
* Luckily, this also compiles into just one branch insn: */ * (and this if() compiles into just test+branch pair): */
goto dec_writeCopies; goto dec_writeCopies;
} }
decode_next_byte: decode_next_byte:
@ -549,7 +553,7 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
/* After 3 consecutive copies of the same byte, the 4th /* After 3 consecutive copies of the same byte, the 4th
* is a repeat count. We count down from 4 instead * is a repeat count. We count down from 4 instead
* of counting up because testing for non-zero is faster */ * of counting up because testing for non-zero is faster */
if (--bd->writeRunCountdown) { if (--bd->writeRunCountdown != 0) {
if (current != previous) if (current != previous)
bd->writeRunCountdown = 4; bd->writeRunCountdown = 4;
} else { } else {
@ -568,11 +572,11 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
} /* for(;;) */ } /* for(;;) */
/* Decompression of this input block completed successfully */ /* Decompression of this input block completed successfully */
bd->writeCRC = ~bd->writeCRC; bd->writeCRC = CRC = ~CRC;
bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ bd->writeCRC; bd->totalCRC = ((bd->totalCRC << 1) | (bd->totalCRC >> 31)) ^ CRC;
/* If this block had a CRC error, force file level CRC error. */ /* If this block had a CRC error, force file level CRC error */
if (bd->writeCRC != bd->headerCRC) { if (CRC != bd->headerCRC) {
bd->totalCRC = bd->headerCRC + 1; bd->totalCRC = bd->headerCRC + 1;
return RETVAL_LAST_BLOCK; return RETVAL_LAST_BLOCK;
} }
@ -581,23 +585,26 @@ int FAST_FUNC read_bunzip(bunzip_data *bd, char *outbuf, int len)
/* Refill the intermediate buffer by Huffman-decoding next block of input */ /* Refill the intermediate buffer by Huffman-decoding next block of input */
{ {
int r = get_next_block(bd); int r = get_next_block(bd);
if (r) { if (r) { /* error/end */
bd->writeCount = r; bd->writeCount = r;
return (r != RETVAL_LAST_BLOCK) ? r : gotcount; return (r != RETVAL_LAST_BLOCK) ? r : out_count;
} }
} }
bd->writeCRC = ~0; CRC = ~0;
pos = bd->writePos; pos = bd->writePos;
current = bd->writeCurrent; current = bd->writeCurrent;
goto decode_next_byte; goto decode_next_byte;
outbuf_full: outbuf_full:
/* Output buffer is full, snapshot state and return */ /* Output buffer is full, save cached state and return */
bd->writePos = pos; bd->writePos = pos;
bd->writeCurrent = current; bd->writeCurrent = current;
bd->writeCRC = CRC;
bd->writeCopies++; bd->writeCopies++;
return gotcount;
return out_count;
} }
/* Allocate the structure, read file header. If in_fd==-1, inbuf must contain /* Allocate the structure, read file header. If in_fd==-1, inbuf must contain