From 7a4021debaa1f89c0ee2ad41f446a8234f8261c7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 14 Jun 2010 00:57:05 +0200 Subject: [PATCH] xargs: simplify logic Removed double-buffering in a linked list. function old new delta store_param - 56 +56 xargs_main 871 840 -31 process_stdin 458 396 -62 process0_stdin 267 143 -124 ------------------------------------------------------------------------------ (add/remove: 1/0 grow/shrink: 0/3 up/down: 56/-217) Total: -161 bytes Signed-off-by: Denys Vlasenko --- findutils/xargs.c | 381 ++++++++++++++++++++++------------------------ 1 file changed, 183 insertions(+), 198 deletions(-) diff --git a/findutils/xargs.c b/findutils/xargs.c index 9988e3dd5..857773df9 100644 --- a/findutils/xargs.c +++ b/findutils/xargs.c @@ -59,10 +59,6 @@ //config: are not special. #include "libbb.h" - -/* This is a NOEXEC applet. Be very careful! */ - - /* COMPAT: SYSV version defaults size (and has a max value of) to 470. We try to make it as large as possible. */ #if !defined(ARG_MAX) && defined(_SC_ARG_MAX) @@ -72,6 +68,12 @@ # define ARG_MAX 470 #endif +/* This is a NOEXEC applet. Be very careful! */ + + +//#define dbg_msg(...) bb_error_msg(__VA_ARGS__) +#define dbg_msg(...) ((void)0) + #ifdef TEST # ifndef ENABLE_FEATURE_XARGS_SUPPORT_CONFIRMATION @@ -88,26 +90,36 @@ # endif #endif + +struct globals { + char **args; + const char *eof_str; + int idx; +} FIX_ALIASING; +#define G (*(struct globals*)&bb_common_bufsiz1) +#define INIT_G() do { } while (0) + + /* * This function has special algorithm. * Don't use fork and include to main! */ -static int xargs_exec(char **args) +static int xargs_exec(void) { int status; - status = spawn_and_wait(args); + status = spawn_and_wait(G.args); if (status < 0) { - bb_simple_perror_msg(args[0]); + bb_simple_perror_msg(G.args[0]); return errno == ENOENT ? 127 : 126; } if (status == 255) { - bb_error_msg("%s: exited with status 255; aborting", args[0]); + bb_error_msg("%s: exited with status 255; aborting", G.args[0]); return 124; } if (status >= 0x180) { bb_error_msg("%s: terminated by signal %d", - args[0], status - 0x180); + G.args[0], status - 0x180); return 125; } if (status) @@ -115,51 +127,59 @@ static int xargs_exec(char **args) return 0; } - -typedef struct xlist_t { - struct xlist_t *link; - size_t length; /* length of xstr[] including NUL */ - char xstr[1]; -} xlist_t; - /* In POSIX/C locale isspace is only these chars: "\t\n\v\f\r" and space. * "\t\n\v\f\r" happen to have ASCII codes 9,10,11,12,13. */ #define ISSPACE(a) ({ unsigned char xargs__isspace = (a) - 9; xargs__isspace == (' ' - 9) || xargs__isspace <= (13 - 9); }) +static void store_param(char *s) +{ + /* Grow by 256 elements at once */ + if (!(G.idx & 0xff)) { /* G.idx == N*256 */ + /* Enlarge, make G.args[(N+1)*256 - 1] last valid idx */ + G.args = xrealloc(G.args, sizeof(G.args[0]) * (G.idx + 0x100)); + } + G.args[G.idx++] = s; +} + +/* process[0]_stdin: + * Read characters into buf[n_max_chars+1], and when parameter delimiter + * is seen, store the address of a new parameter to args[]. + * If reading discovers that last chars do not form the complete + * parameter, the pointer to the first such "tail character" is returned. + * (buf has extra byte at the end to accomodate terminating NUL + * of "tail characters" string). + * Otherwise, the returned pointer points to NUL byte. + * The args[] vector is NULL-terminated. + * On entry, buf[] may contain some "seed chars" which are to become + * the beginning of the first parameter. + */ + #if ENABLE_FEATURE_XARGS_SUPPORT_QUOTES -static xlist_t* process_stdin(xlist_t *list_arg, - const char *eof_str, size_t n_max_chars, char *buf) +static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf) { #define NORM 0 #define QUOTE 1 #define BACKSLASH 2 #define SPACE 4 - char *s = NULL; /* start of the word */ - char *p = NULL; /* pointer to end of the word */ + char *s; /* start of the word */ + char *p; /* pointer to end of the word */ char q = '\0'; /* quote char */ char state = NORM; - char eof_str_detected = 0; - size_t line_l = 0; /* size of loaded args */ - xlist_t *cur; - xlist_t *prev; - prev = cur = list_arg; - while (cur) { - prev = cur; - line_l += cur->length; - cur = cur->link; - } + s = buf; + p = s + strlen(buf); + + /* "goto ret" is used instead of "break" to make control flow + * more obvious: */ while (1) { int c = getchar(); if (c == EOF) { - if (s) - goto unexpected_eof; - break; + if (p != s) + goto close_word; + goto ret; } - if (eof_str_detected) /* skip till EOF */ - continue; if (state == BACKSLASH) { state = NORM; goto set; @@ -171,15 +191,13 @@ static xlist_t* process_stdin(xlist_t *list_arg, state = NORM; } else { /* if (state == NORM) */ if (ISSPACE(c)) { - if (s) { - unexpected_eof: + if (p != s) { + close_word: state = SPACE; c = '\0'; goto set; } } else { - if (s == NULL) - s = p = buf; if (c == '\\') { state = BACKSLASH; } else if (c == '\'' || c == '"') { @@ -187,8 +205,6 @@ static xlist_t* process_stdin(xlist_t *list_arg, state = QUOTE; } else { set: - if ((size_t)(p - buf) >= n_max_chars) - bb_error_msg_and_die("argument line too long"); *p++ = c; } } @@ -199,149 +215,128 @@ static xlist_t* process_stdin(xlist_t *list_arg, q == '\'' ? "single" : "double"); } /* A full word is loaded */ - if (eof_str) { - eof_str_detected = (strcmp(s, eof_str) == 0); - } - if (!eof_str_detected) { - size_t length = (p - buf); - /* Dont xzalloc - it can be quite big */ - cur = xmalloc(offsetof(xlist_t, xstr) + length); - cur->link = NULL; - cur->length = length; - memcpy(cur->xstr, s, length); - if (prev == NULL) { - list_arg = cur; - } else { - prev->link = cur; + if (G.eof_str) { + if (strcmp(s, G.eof_str) == 0) { + while (getchar() != EOF) + continue; + p = s; + goto ret; } - prev = cur; - line_l += length; - if (line_l >= n_max_chars) /* limit memory usage */ - break; } - s = NULL; + n_max_chars -= (p - s); + /* if (n_max_chars < 0) impossible */ + store_param(s); + dbg_msg("args[]:'%s'", s); + s = p; + n_max_arg--; + if (n_max_arg == 0 || n_max_chars == 0) { + goto ret; + } state = NORM; + } else /* state != SPACE */ + if (p - s >= n_max_chars) { + dbg_msg("s:'%s' p-s:%d n_max_chars:%d", s, (int)(p-s), n_max_chars); + goto ret; } } - return list_arg; + ret: + *p = '\0'; + store_param(NULL); + dbg_msg("return:'%s'", s); + return s; } #else /* The variant does not support single quotes, double quotes or backslash */ -static xlist_t* process_stdin(xlist_t *list_arg, - const char *eof_str, size_t n_max_chars, char *buf) +static char* FAST_FUNC process_stdin(int n_max_chars, int n_max_arg, char *buf) { - char eof_str_detected = 0; - char *s = NULL; /* start of the word */ - char *p = NULL; /* pointer to end of the word */ - size_t line_l = 0; /* size of loaded args */ - xlist_t *cur; - xlist_t *prev; + char *s; /* start of the word */ + char *p; /* pointer to end of the word */ - prev = cur = list_arg; - while (cur) { - prev = cur; - line_l += cur->length; - cur = cur->link; - } + s = buf; + p = s + strlen(buf); while (1) { int c = getchar(); if (c == EOF) { - if (s == NULL) - break; - } - if (eof_str_detected) { /* skip till EOF */ - continue; + if (p == s) + goto ret; } if (c == EOF || ISSPACE(c)) { - if (s == NULL) + if (p == s) continue; c = EOF; } - if (s == NULL) - s = p = buf; - if ((size_t)(p - buf) >= n_max_chars) - bb_error_msg_and_die("argument line too long"); *p++ = (c == EOF ? '\0' : c); if (c == EOF) { /* word's delimiter or EOF detected */ /* A full word is loaded */ - if (eof_str) { - eof_str_detected = (strcmp(s, eof_str) == 0); - } - if (!eof_str_detected) { - size_t length = (p - buf); - /* Dont xzalloc - it can be quite big */ - cur = xmalloc(offsetof(xlist_t, xstr) + length); - cur->link = NULL; - cur->length = length; - memcpy(cur->xstr, s, length); - if (prev == NULL) { - list_arg = cur; - } else { - prev->link = cur; + if (G.eof_str) { + if (strcmp(s, G.eof_str) == 0) { + while (getchar() != EOF) + continue; + p = s; + goto ret; } - prev = cur; - line_l += length; - if (line_l >= n_max_chars) /* limit memory usage */ - break; } - s = NULL; + n_max_chars -= (p - s); + /* if (n_max_chars < 0) impossible */ + store_param(s); + dbg_msg("args[]:'%s'", s); + s = p; + n_max_arg--; + if (n_max_arg == 0 || n_max_chars == 0) { + goto ret; + } + } else /* c != EOF */ + if (p - s >= n_max_chars) { + goto ret; } } - return list_arg; + ret: + *p = '\0'; + store_param(NULL); + dbg_msg("return:'%s'", s); + return s; } #endif /* FEATURE_XARGS_SUPPORT_QUOTES */ #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM -static xlist_t* process0_stdin(xlist_t *list_arg, - const char *eof_str UNUSED_PARAM, size_t n_max_chars, char *buf) +static char* FAST_FUNC process0_stdin(int n_max_chars, int n_max_arg, char *buf) { - char *s = NULL; /* start of the word */ - char *p = NULL; /* pointer to end of the word */ - size_t line_l = 0; /* size of loaded args */ - xlist_t *cur; - xlist_t *prev; + char *s; /* start of the word */ + char *p; /* pointer to end of the word */ - prev = cur = list_arg; - while (cur) { - prev = cur; - line_l += cur->length; - cur = cur->link; - } + s = buf; + p = s + strlen(buf); while (1) { int c = getchar(); if (c == EOF) { - if (s == NULL) - break; + if (p == s) + goto ret; c = '\0'; } - if (s == NULL) - s = p = buf; - if ((size_t)(p - buf) >= n_max_chars) - bb_error_msg_and_die("argument line too long"); *p++ = c; if (c == '\0') { /* word's delimiter or EOF detected */ /* A full word is loaded */ - size_t length = (p - buf); - /* Dont xzalloc - it can be quite big */ - cur = xmalloc(offsetof(xlist_t, xstr) + length); - cur->link = NULL; - cur->length = length; - memcpy(cur->xstr, s, length); - if (prev == NULL) { - list_arg = cur; - } else { - prev->link = cur; + n_max_chars -= (p - s); + /* if (n_max_chars < 0) impossible */ + store_param(s); + dbg_msg("args[]:'%s'", s); + n_max_arg--; + s = p; + if (n_max_arg == 0 || n_max_chars == 0) { + goto ret; } - prev = cur; - line_l += length; - if (line_l >= n_max_chars) /* limit memory usage */ - break; - s = NULL; + } else /* c != '\0' */ + if (p - s >= n_max_chars) { + goto ret; } } - return list_arg; + ret: + *p = '\0'; + store_param(NULL); + dbg_msg("return:'%s'", s); + return s; } #endif /* FEATURE_XARGS_SUPPORT_ZERO_TERM */ @@ -397,28 +392,30 @@ enum { int xargs_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int xargs_main(int argc, char **argv) { - xlist_t *list = NULL; + int i; int child_error = 0; char *max_args; char *max_chars; char *buf; - int n_max_arg; - const char *eof_str = NULL; unsigned opt; - size_t n_max_chars; + int n_max_chars; + int n_max_arg; #if ENABLE_FEATURE_XARGS_SUPPORT_ZERO_TERM - xlist_t* (*read_args)(xlist_t*, const char*, size_t, char*) = process_stdin; + char* FAST_FUNC (*read_args)(int, int, char*) = process_stdin; #else #define read_args process_stdin #endif - opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &eof_str, &eof_str); + INIT_G(); + + G.eof_str = NULL; + opt = getopt32(argv, OPTION_STR, &max_args, &max_chars, &G.eof_str, &G.eof_str); /* -E ""? You may wonder why not just omit -E? * This is used for portability: * old xargs was using "_" as default for -E / -e */ - if ((opt & OPT_EOF_STRING1) && eof_str[0] == '\0') - eof_str = NULL; + if ((opt & OPT_EOF_STRING1) && G.eof_str[0] == '\0') + G.eof_str = NULL; if (opt & OPT_ZEROTERM) IF_FEATURE_XARGS_SUPPORT_ZERO_TERM(read_args = process0_stdin); @@ -451,93 +448,81 @@ int xargs_main(int argc, char **argv) n_max_chars = 20 * 1024; if (opt & OPT_UPTO_SIZE) { - int i; size_t n_chars = 0; - n_max_chars = xatoul_range(max_chars, 1, INT_MAX); + n_max_chars = xatou_range(max_chars, 1, INT_MAX); for (i = 0; argv[i]; i++) { n_chars += strlen(argv[i]) + 1; } n_max_chars -= n_chars; - if ((ssize_t)n_max_chars <= 0) { + if (n_max_chars <= 0) { bb_error_msg_and_die("can't fit single argument within argument list size limit"); } } - buf = xmalloc(n_max_chars); + buf = xzalloc(n_max_chars + 1); if (opt & OPT_UPTO_NUMBER) { - n_max_arg = xatoul_range(max_args, 1, INT_MAX); + n_max_arg = xatou_range(max_args, 1, INT_MAX); if (n_max_arg < n_max_chars) goto skip; } n_max_arg = n_max_chars; skip: - while ((list = read_args(list, eof_str, n_max_chars, buf)) != NULL - || !(opt & OPT_NO_EMPTY) - ) { - char **args; - xlist_t *cur; - int i, n; - size_t n_chars; + /* Allocate pointers for execvp */ + /* We can statically allocate (argc + n_max_arg + 1) elements + * and do not bother with resizing args[], but on 64-bit machines + * this results in args[] vector which is ~8 times bigger + * than n_max_chars! That is, with n_max_chars == 20k, + * args[] will take 160k (!), which will most likely be + * almost entirely unused. + */ + /* See store_param() for matching 256-step growth logic */ + G.args = xmalloc(sizeof(G.args[0]) * ((argc + 0xff) & ~0xff)); + /* Store the command to be executed, part 1 */ + for (i = 0; argv[i]; i++) + G.args[i] = argv[i]; + + while (1) { + char *rem; + + G.idx = argc; + rem = read_args(n_max_chars, n_max_arg, buf); + + if (!G.args[argc]) { + if (*rem != '\0') + bb_error_msg_and_die("argument line too long"); + if (opt & OPT_NO_EMPTY) + break; + } opt |= OPT_NO_EMPTY; - /* take args from list, not exceeding arg and char limits */ - n_chars = 0; - n = 0; - for (cur = list; cur; cur = cur->link) { - n_chars += cur->length; - if (n_chars > n_max_chars || n >= n_max_arg) { - if (opt & OPT_TERMINATE) - bb_error_msg_and_die("argument list too long"); - break; - } - n++; - } - - /* allocate pointers for execvp */ - args = xzalloc(sizeof(args[0]) * (argc + n + 1)); - - /* store the command to be executed - * (taken from the command line) */ - for (i = 0; argv[i]; i++) - args[i] = argv[i]; - /* (taken from stdin) */ - for (cur = list; n; cur = cur->link) { - args[i++] = cur->xstr; - n--; - } - if (opt & (OPT_INTERACTIVE | OPT_VERBOSE)) { - for (i = 0; args[i]; i++) { + for (i = 0; G.args[i]; i++) { if (i) bb_putchar_stderr(' '); - fputs(args[i], stderr); + fputs(G.args[i], stderr); } if (!(opt & OPT_INTERACTIVE)) bb_putchar_stderr('\n'); } if (!(opt & OPT_INTERACTIVE) || xargs_ask_confirmation()) { - child_error = xargs_exec(args); + child_error = xargs_exec(); } - /* remove list elements which we consumed */ - for (i = argc; args[i]; i++) { - cur = list; - list = list->link; - free(cur); - } - free(args); - if (child_error > 0 && child_error != 123) { break; } + + overlapping_strcpy(buf, rem); } /* while */ - if (ENABLE_FEATURE_CLEAN_UP) + if (ENABLE_FEATURE_CLEAN_UP) { + free(G.args); free(buf); + } return child_error; }