gzip: speed up and shrink put_16bit()

function old new delta put_16bit 104 98 -6 Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
2024-12-23 05:29:58 +00:00 · 2015-02-02 16:07:07 +01:00 · 2015-02-02 16:07:07 +01:00 · f7f70bf1b3
commit f7f70bf1b3
parent 7f7ade1964
2 changed files with 36 additions and 7 deletions
--- a/archival/gzip.c
+++ b/archival/gzip.c
@ -417,19 +417,46 @@ static void flush_outbuf(void)
 #define put_8bit(c) \
 do { \
 	G1.outbuf[G1.outcnt++] = (c); \
-	if (G1.outcnt == OUTBUFSIZ) flush_outbuf(); \
+	if (G1.outcnt == OUTBUFSIZ) \
 		flush_outbuf(); \
 } while (0)
 /* Output a 16 bit value, lsb first */
 static void put_16bit(ush w)
 {
-	if (G1.outcnt < OUTBUFSIZ - 2) {
+	/* GCC 4.2.1 won't optimize out redundant loads of G1.outcnt
-		G1.outbuf[G1.outcnt++] = w;
+	 * (probably because of fear of aliasing with G1.outbuf[]
-		G1.outbuf[G1.outcnt++] = w >> 8;
+	 * stores), do it explicitly:
-	} else {
+	 */
-		put_8bit(w);
+	unsigned outcnt = G1.outcnt;
-		put_8bit(w >> 8);
+	uch *dst = &G1.outbuf[outcnt];
 #if BB_UNALIGNED_MEMACCESS_OK && BB_LITTLE_ENDIAN
 	if (outcnt < OUTBUFSIZ-2) {
 		/* Common case */
 		ush *dst16 = (void*) dst;
 		*dst16 = w; /* unalinged LSB 16-bit store */
 		G1.outcnt = outcnt + 2;
 		return;
 	}
 	*dst = (uch)w;
 	w >>= 8;
 #else
 	*dst++ = (uch)w;
 	w >>= 8;
 	if (outcnt < OUTBUFSIZ-2) {
 		/* Common case */
 		*dst = w;
 		G1.outcnt = outcnt + 2;
 		return;
 	}
 #endif
 	/* Slowpath: we will need to do flush_outbuf() */
 	G1.outcnt++;
 	if (G1.outcnt == OUTBUFSIZ)
 		flush_outbuf();
 	put_8bit(w);
 }
 static void put_32bit(ulg n)
--- a/include/platform.h
+++ b/include/platform.h
@ -217,6 +217,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
 * a lvalue. This makes it more likely to not swap them by mistake
 */
 #if defined(i386) || defined(__x86_64__) || defined(__powerpc__)
 # define BB_UNALIGNED_MEMACCESS_OK 1
 # define move_from_unaligned_int(v, intp)  ((v) = *(bb__aliased_int*)(intp))
 # define move_from_unaligned_long(v, longp) ((v) = *(bb__aliased_long*)(longp))
 # define move_from_unaligned16(v, u16p) ((v) = *(bb__aliased_uint16_t*)(u16p))
@ -225,6 +226,7 @@ typedef uint64_t bb__aliased_uint64_t FIX_ALIASING;
 # define move_to_unaligned32(u32p, v)   (*(bb__aliased_uint32_t*)(u32p) = (v))
 /* #elif ... - add your favorite arch today! */
 #else
 # define BB_UNALIGNED_MEMACCESS_OK 0
 /* performs reasonably well (gcc usually inlines memcpy here) */
 # define move_from_unaligned_int(v, intp) (memcpy(&(v), (intp), sizeof(int)))
 # define move_from_unaligned_long(v, longp) (memcpy(&(v), (longp), sizeof(long)))