From 75605788ff6be5a766a7e41da583d5e8f47d9ac4 Mon Sep 17 00:00:00 2001 From: Denis Vlasenko Date: Wed, 14 Mar 2007 00:07:51 +0000 Subject: [PATCH] gzip: use common bbunzip infrastructure - ~700 bytes code less --- archival/Kbuild | 2 +- archival/bbunzip.c | 23 +++--- archival/gzip.c | 171 +++++++++++++++++++-------------------- docs/keep_data_small.txt | 88 ++++++++++++++++++++ 4 files changed, 185 insertions(+), 99 deletions(-) create mode 100644 docs/keep_data_small.txt diff --git a/archival/Kbuild b/archival/Kbuild index 011feee5f..07b442f15 100644 --- a/archival/Kbuild +++ b/archival/Kbuild @@ -14,7 +14,7 @@ lib-$(CONFIG_CPIO) += cpio.o lib-$(CONFIG_DPKG) += dpkg.o lib-$(CONFIG_DPKG_DEB) += dpkg_deb.o lib-$(CONFIG_GUNZIP) += bbunzip.o -lib-$(CONFIG_GZIP) += gzip.o +lib-$(CONFIG_GZIP) += gzip.o bbunzip.o lib-$(CONFIG_RPM2CPIO) += rpm2cpio.o lib-$(CONFIG_RPM) += rpm.o lib-$(CONFIG_TAR) += tar.o diff --git a/archival/bbunzip.c b/archival/bbunzip.c index f7c861256..e16e6b083 100644 --- a/archival/bbunzip.c +++ b/archival/bbunzip.c @@ -9,12 +9,12 @@ #include "unarchive.h" enum { - OPT_STDOUT = 1, - OPT_FORCE = 2, + OPT_STDOUT = 0x1, + OPT_FORCE = 0x2, /* gunzip only: */ - OPT_TEST = 4, - OPT_DECOMPRESS = 8, - OPT_VERBOSE = 0x10, + OPT_VERBOSE = 0x4, + OPT_DECOMPRESS = 0x8, + OPT_TEST = 0x10, }; static @@ -33,8 +33,7 @@ int open_to_or_warn(int to_fd, const char *filename, int flags, int mode) return 0; } -static -int unpack(char **argv, +int bbunpack(char **argv, char* (*make_new_name)(char *filename), USE_DESKTOP(long long) int (*unpacker)(void) ) @@ -173,7 +172,7 @@ int bunzip2_main(int argc, char **argv) if (applet_name[2] == 'c') option_mask32 |= OPT_STDOUT; - return unpack(argv, make_new_name_bunzip2, unpack_bunzip2); + return bbunpack(argv, make_new_name_bunzip2, unpack_bunzip2); } #endif @@ -267,13 +266,13 @@ USE_DESKTOP(long long) int unpack_gunzip(void) int gunzip_main(int argc, char **argv); int gunzip_main(int argc, char **argv) { - getopt32(argc, argv, "cftdv"); + getopt32(argc, argv, "cfvdt"); argv += optind; /* if called as zcat */ if (applet_name[1] == 'c') option_mask32 |= OPT_STDOUT; - return unpack(argv, make_new_name_gunzip, unpack_gunzip); + return bbunpack(argv, make_new_name_gunzip, unpack_gunzip); } #endif @@ -311,7 +310,7 @@ int unlzma_main(int argc, char **argv) if (applet_name[4] == 'c') option_mask32 |= OPT_STDOUT; - return unpack(argv, make_new_name_unlzma, unpack_unlzma); + return bbunpack(argv, make_new_name_unlzma, unpack_unlzma); } #endif @@ -350,7 +349,7 @@ int uncompress_main(int argc, char **argv) getopt32(argc, argv, "cf"); argv += optind; - return unpack(argv, make_new_name_uncompress, unpack_uncompress); + return bbunpack(argv, make_new_name_uncompress, unpack_uncompress); } #endif diff --git a/archival/gzip.c b/archival/gzip.c index c8444ac25..90075272d 100644 --- a/archival/gzip.c +++ b/archival/gzip.c @@ -2074,6 +2074,7 @@ static void zip(int in, int out) /* ======================================================================== */ +#if 0 static void abort_gzip(int ATTRIBUTE_UNUSED ignored) { exit(1); @@ -2082,92 +2083,6 @@ static void abort_gzip(int ATTRIBUTE_UNUSED ignored) int gzip_main(int argc, char **argv); int gzip_main(int argc, char **argv) { - enum { - OPT_tostdout = 0x1, - OPT_force = 0x2, - }; - - unsigned opt; - int inFileNum; - int outFileNum; - int i; - struct stat statBuf; - - opt = getopt32(argc, argv, "cf123456789qv" USE_GUNZIP("d")); - //if (opt & 0x1) // -c - //if (opt & 0x2) // -f - /* Ignore 1-9 (compression level) options */ - //if (opt & 0x4) // -1 - //if (opt & 0x8) // -2 - //if (opt & 0x10) // -3 - //if (opt & 0x20) // -4 - //if (opt & 0x40) // -5 - //if (opt & 0x80) // -6 - //if (opt & 0x100) // -7 - //if (opt & 0x200) // -8 - //if (opt & 0x400) // -9 - //if (opt & 0x800) // -q - //if (opt & 0x1000) // -v -#if ENABLE_GUNZIP /* gunzip_main may not be visible... */ - if (opt & 0x2000) { // -d - /* FIXME: getopt32 should not depend on optind */ - optind = 1; - return gunzip_main(argc, argv); - } -#endif - - /* Comment?? */ - if (signal(SIGINT, SIG_IGN) != SIG_IGN) { - signal(SIGINT, abort_gzip); - } -#ifdef SIGTERM - if (signal(SIGTERM, SIG_IGN) != SIG_IGN) { - signal(SIGTERM, abort_gzip); - } -#endif -#ifdef SIGHUP - if (signal(SIGHUP, SIG_IGN) != SIG_IGN) { - signal(SIGHUP, abort_gzip); - } -#endif - - ptr_to_globals = xzalloc(sizeof(struct global1) + sizeof(struct global2)); - ptr_to_globals++; - G2.l_desc.dyn_tree = G2.dyn_ltree; - G2.l_desc.static_tree = G2.static_ltree; - G2.l_desc.extra_bits = extra_lbits; - G2.l_desc.extra_base = LITERALS + 1; - G2.l_desc.elems = L_CODES; - G2.l_desc.max_length = MAX_BITS; - //G2.l_desc.max_code = 0; - - G2.d_desc.dyn_tree = G2.dyn_dtree; - G2.d_desc.static_tree = G2.static_dtree; - G2.d_desc.extra_bits = extra_dbits; - //G2.d_desc.extra_base = 0; - G2.d_desc.elems = D_CODES; - G2.d_desc.max_length = MAX_BITS; - //G2.d_desc.max_code = 0; - - G2.bl_desc.dyn_tree = G2.bl_tree; - //G2.bl_desc.static_tree = NULL; - G2.bl_desc.extra_bits = extra_blbits, - //G2.bl_desc.extra_base = 0; - G2.bl_desc.elems = BL_CODES; - G2.bl_desc.max_length = MAX_BL_BITS; - //G2.bl_desc.max_code = 0; - - /* Allocate all global buffers (for DYN_ALLOC option) */ - ALLOC(uch, G1.l_buf, INBUFSIZ); - ALLOC(uch, G1.outbuf, OUTBUFSIZ); - ALLOC(ush, G1.d_buf, DIST_BUFSIZE); - ALLOC(uch, G1.window, 2L * WSIZE); - ALLOC(ush, G1.prev, 1L << BITS); - - /* Initialise the CRC32 table */ - G1.crc_32_tab = crc32_filltable(0); - - clear_bufs(); if (optind == argc) { G1.time_stamp = 0; @@ -2240,3 +2155,87 @@ int gzip_main(int argc, char **argv) return 0; //##G1.exit_code; } +#endif + +int bbunpack(char **argv, + char* (*make_new_name)(char *filename), + USE_DESKTOP(long long) int (*unpacker)(void) +); + +static +char* make_new_name_gzip(char *filename) +{ + return xasprintf("%s.gz", filename); +} + +static +USE_DESKTOP(long long) int pack_gzip(void) +{ + struct stat s; + + G1.time_stamp = 0; + if (!fstat(STDIN_FILENO, &s)) + G1.time_stamp = s.st_ctime; + zip(STDIN_FILENO, STDOUT_FILENO); + return 0; +} + +int gzip_main(int argc, char **argv); +int gzip_main(int argc, char **argv) +{ + unsigned opt; + + /* Must match bbunzip's constants OPT_STDOUT, OPT_FORCE! */ + opt = getopt32(argc, argv, "cfv" USE_GUNZIP("d") "q123456789" ); + option_mask32 &= 0x7; /* Clear -d, ignore -q, -0..9 */ + //if (opt & 0x1) // -c + //if (opt & 0x2) // -f + //if (opt & 0x4) // -v +#if ENABLE_GUNZIP /* gunzip_main may not be visible... */ + if (opt & 0x8) { // -d + /* FIXME: getopt32 should not depend on optind */ + optind = 1; + return gunzip_main(argc, argv); + } +#endif + + ptr_to_globals = xzalloc(sizeof(struct global1) + sizeof(struct global2)); + ptr_to_globals++; + G2.l_desc.dyn_tree = G2.dyn_ltree; + G2.l_desc.static_tree = G2.static_ltree; + G2.l_desc.extra_bits = extra_lbits; + G2.l_desc.extra_base = LITERALS + 1; + G2.l_desc.elems = L_CODES; + G2.l_desc.max_length = MAX_BITS; + //G2.l_desc.max_code = 0; + + G2.d_desc.dyn_tree = G2.dyn_dtree; + G2.d_desc.static_tree = G2.static_dtree; + G2.d_desc.extra_bits = extra_dbits; + //G2.d_desc.extra_base = 0; + G2.d_desc.elems = D_CODES; + G2.d_desc.max_length = MAX_BITS; + //G2.d_desc.max_code = 0; + + G2.bl_desc.dyn_tree = G2.bl_tree; + //G2.bl_desc.static_tree = NULL; + G2.bl_desc.extra_bits = extra_blbits, + //G2.bl_desc.extra_base = 0; + G2.bl_desc.elems = BL_CODES; + G2.bl_desc.max_length = MAX_BL_BITS; + //G2.bl_desc.max_code = 0; + + /* Allocate all global buffers (for DYN_ALLOC option) */ + ALLOC(uch, G1.l_buf, INBUFSIZ); + ALLOC(uch, G1.outbuf, OUTBUFSIZ); + ALLOC(ush, G1.d_buf, DIST_BUFSIZE); + ALLOC(uch, G1.window, 2L * WSIZE); + ALLOC(ush, G1.prev, 1L << BITS); + + /* Initialise the CRC32 table */ + G1.crc_32_tab = crc32_filltable(0); + + clear_bufs(); + + return bbunpack(argv, make_new_name_gzip, pack_gzip); +} diff --git a/docs/keep_data_small.txt b/docs/keep_data_small.txt new file mode 100644 index 000000000..88cc2bc66 --- /dev/null +++ b/docs/keep_data_small.txt @@ -0,0 +1,88 @@ + Keeping data small + +When many applets are compiled into busybox, all rw data and +bss for each applet are concatenated. Including those from libc, +if static bbox is built. When bbox is started, _all_ this data +is allocated, not just that one part for selected applet. + +What "allocated" exactly means, depends on arch. +On nommu it's probably bites the most, actually using real +RAM for rwdata and bss. On i386, bss is lazily allocated +by COWed zero pages. Not sure about rwdata - also COW? + +Small experiment measures "parasitic" bbox memory consumption. +Here we start 1000 "busybox sleep 10" in parallel. +bbox binary is practically allyesconfig static one, +built against uclibc: + +bash-3.2# nmeter '%t %c %b %m %p %[pn]' +23:17:28 .......... 0 0 168M 0 147 +23:17:29 .......... 0 0 168M 0 147 +23:17:30 U......... 0 0 168M 1 147 +23:17:31 SU........ 0 188k 181M 244 391 +23:17:32 SSSSUUU... 0 0 223M 757 1147 +23:17:33 UUU....... 0 0 223M 0 1147 +23:17:34 U......... 0 0 223M 1 1147 +23:17:35 .......... 0 0 223M 0 1147 +23:17:36 .......... 0 0 223M 0 1147 +23:17:37 S......... 0 0 223M 0 1147 +23:17:38 .......... 0 0 223M 1 1147 +23:17:39 .......... 0 0 223M 0 1147 +23:17:40 .......... 0 0 223M 0 1147 +23:17:41 .......... 0 0 210M 0 906 +23:17:42 .......... 0 0 168M 1 147 +23:17:43 .......... 0 0 168M 0 147 + +This requires 55M of memory. Thus 1 trivial busybox applet +takes 55k of userspace memory (nmeter doesn't account for kernel-side +allocations). Definitely can be improved. + +Thus we should avoid large global data in our applets, +and should minimize usage of libc functions which implicitly use +such structures in libc. + + Example 1 + +One example how to reduce global data usage is in +archival/libunarchive/decompress_unzip.c: + +/* This is somewhat complex-looking arrangement, but it allows + * to place decompressor state either in bss or in + * malloc'ed space simply by changing #defines below. + * Sizes on i386: + * text data bss dec hex + * 5256 0 108 5364 14f4 - bss + * 4915 0 0 4915 1333 - malloc + */ +#define STATE_IN_BSS 0 +#define STATE_IN_MALLOC 1 + +This example completely eliminates globals in that module. +Required memory is allocated in inflate_gunzip() [its main module] +and then passed down to all subroutines which need to access globals +as a parameter. + + Example 2 + +In case you don't want to pass this additional parameter everywhere, +take a look at archival/gzip.c. Here all global data is replaced by +singe global pointer (ptr_to_globals) to allocated storage. + +In order to not duplicate ptr_to_globals in every applet, you can +reuse single common one. It is defined in libbb/messages.c +as void *ptr_to_globals, but is NOT declared in libbb.h. +You first define a struct: + +struct my_globals { int a; char buf[1000]; }; + +and then declare that ptr_to_globals is a pointer to it: + +extern struct my_globals *ptr_to_globals; +#define G (*ptr_to_globals) + +Linker magic enures that these two merge into single pointer object. +Now initialize it in _main(): + + ptr_to_globals = xzalloc(sizeof(G)); + +and you can reference "globals" by G.a, G.buf and so on, in any function.