gzip: speed up and shrink put_16bit()

function                                             old     new   delta
put_16bit                                            104      98      -6

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2015-02-02 16:07:07 +01:00
parent 7f7ade1964
commit f7f70bf1b3
2 changed files with 36 additions and 7 deletions

View File

@ -417,19 +417,46 @@ static void flush_outbuf(void)
#define put_8bit(c) \ #define put_8bit(c) \
do { \ do { \
G1.outbuf[G1.outcnt++] = (c); \ G1.outbuf[G1.outcnt++] = (c); \
if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \ if (G1.outcnt == OUTBUFSIZ) \
flush_outbuf(); \
} while (0) } while (0)
/* Output a 16 bit value, lsb first */ /* Output a 16 bit value, lsb first */
static void put_16bit(ush w) static void put_16bit(ush w)
{ {
if (G1.outcnt < OUTBUFSIZ - 2) { /* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt
G1.outbuf[G1.outcnt++] = w; * (probably because of fear of aliasing with G1.outbuf[]
G1.outbuf[G1.outcnt++] = w >> 8; * stores), do it explicitly:
} else { */
put_8bit(w); unsigned outcnt = G1.outcnt;
put_8bit(w >> 8); uch *dst = &G1.outbuf[outcnt];
#if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN
if (outcnt < OUTBUFSIZ-2) {
/* Common case */
ush *dst16 = (void*) dst;
*dst16 = w; /* unalinged LSB 16-bit store */
G1.outcnt = outcnt + 2;
return;
} }
*dst = (uch)w;
w >>= 8;
#else
*dst++ = (uch)w;
w >>= 8;
if (outcnt < OUTBUFSIZ-2) {
/* Common case */
*dst = w;
G1.outcnt = outcnt + 2;
return;
}
#endif
/* Slowpath: we will need to do flush_outbuf() */
G1.outcnt++;
if (G1.outcnt == OUTBUFSIZ)
flush_outbuf();
put_8bit(w);
} }
static void put_32bit(ulg n) static void put_32bit(ulg n)

View File

@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
* a lvalue. This makes it more likely to not swap them by mistake * a lvalue. This makes it more likely to not swap them by mistake
*/ */
#if defined(i386) || defined(__x86_64__) || defined(__powerpc__) #if defined(i386) || defined(__x86_64__) || defined(__powerpc__)
# define BB_UNALIGNED_MEMACCESS_OK 1
# define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp)) # define move_from_unaligned_int(v, intp) ((v) = *(bb__aliased_int*)(intp))
# define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp)) # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
# define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p)) # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
# define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v)) # define move_to_unaligned32(u32p, v) (*(bb__aliased_uint32_t*)(u32p) = (v))
/* #elif ... - add your favorite arch today! */ /* #elif ... - add your favorite arch today! */
#else #else
# define BB_UNALIGNED_MEMACCESS_OK 0
/* performs reasonably well (gcc usually inlines memcpy here) */ /* performs reasonably well (gcc usually inlines memcpy here) */
# define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int))) # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int)))
# define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long))) # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))