sed: open input files sequentially to avoid EMFILE

Currently, sed pre-opens all files, which may cause EMFILE errors
on systems with low ulimit -n.  Change sed to open one file at a time.

function                                             old     new   delta
get_next_line                                        177     235     +58
sed_main                                             682     652     -30
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 1/1 up/down: 58/-30)             Total: 28 bytes

Based on the patch by Daniel Borca <dborca@yahoo.com>

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
This commit is contained in:
Denys Vlasenko 2013-11-28 03:14:16 +01:00
parent 8bd810bd27
commit 259b3c047a
2 changed files with 36 additions and 27 deletions

View File

@ -23,7 +23,7 @@
* resulting sed_cmd_t structures are appended to a linked list * resulting sed_cmd_t structures are appended to a linked list
* (G.sed_cmd_head/G.sed_cmd_tail). * (G.sed_cmd_head/G.sed_cmd_tail).
* *
* add_input_file() adds a FILE* to the list of input files. We need to * add_input_file() adds a char* to the list of input files. We need to
* know all input sources ahead of time to find the last line for the $ match. * know all input sources ahead of time to find the last line for the $ match.
* *
* process_files() does actual sedding, reading data lines from each input FILE* * process_files() does actual sedding, reading data lines from each input FILE*
@ -135,12 +135,15 @@ static const char semicolon_whitespace[] ALIGN1 = "; \n\r\t\v";
struct globals { struct globals {
/* options */ /* options */
int be_quiet, regex_type; int be_quiet, regex_type;
FILE *nonstdout; FILE *nonstdout;
char *outname, *hold_space; char *outname, *hold_space;
smallint exitcode;
/* List of input files */ /* list of input files */
int input_file_count, current_input_file; int input_file_count, current_input_file;
FILE **input_file_list; const char **input_file_list;
FILE *current_fp;
regmatch_t regmatch[10]; regmatch_t regmatch[10];
regex_t *previous_regex_ptr; regex_t *previous_regex_ptr;
@ -148,7 +151,7 @@ struct globals {
/* linked list of sed commands */ /* linked list of sed commands */
sed_cmd_t *sed_cmd_head, **sed_cmd_tail; sed_cmd_t *sed_cmd_head, **sed_cmd_tail;
/* Linked list of append lines */ /* linked list of append lines */
llist_t *append_head; llist_t *append_head;
char *add_cmd_line; char *add_cmd_line;
@ -200,8 +203,8 @@ static void sed_free_and_close_stuff(void)
free(G.hold_space); free(G.hold_space);
while (G.current_input_file < G.input_file_count) if (G.current_fp)
fclose(G.input_file_list[G.current_input_file++]); fclose(G.current_fp);
} }
#else #else
void sed_free_and_close_stuff(void); void sed_free_and_close_stuff(void);
@ -939,8 +942,20 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
/* will be returned if last line in the file /* will be returned if last line in the file
* doesn't end with either '\n' or '\0' */ * doesn't end with either '\n' or '\0' */
gc = NO_EOL_CHAR; gc = NO_EOL_CHAR;
while (G.current_input_file < G.input_file_count) { for (; G.input_file_list[G.current_input_file]; G.current_input_file++) {
FILE *fp = G.input_file_list[G.current_input_file]; FILE *fp = G.current_fp;
if (!fp) {
const char *path = G.input_file_list[G.current_input_file];
fp = stdin;
if (path != bb_msg_standard_input) {
fp = fopen_or_warn(path, "r");
if (!fp) {
G.exitcode = EXIT_FAILURE;
continue;
}
}
G.current_fp = fp;
}
/* Read line up to a newline or NUL byte, inclusive, /* Read line up to a newline or NUL byte, inclusive,
* return malloc'ed char[]. length of the chunk read * return malloc'ed char[]. length of the chunk read
* is stored in len. NULL if EOF/error */ * is stored in len. NULL if EOF/error */
@ -971,8 +986,8 @@ static char *get_next_line(char *gets_char, char *last_puts_char, char last_gets
* (note: *no* newline after "b bang"!) */ * (note: *no* newline after "b bang"!) */
} }
/* Close this file and advance to next one */ /* Close this file and advance to next one */
fclose(fp); fclose_if_not_stdin(fp);
G.current_input_file++; G.current_fp = NULL;
} }
*gets_char = gc; *gets_char = gc;
return temp; return temp;
@ -1399,7 +1414,7 @@ static void add_cmd_block(char *cmdstr)
free(sv); free(sv);
} }
static void add_input_file(FILE *file) static void add_input_file(const char *file)
{ {
G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count); G.input_file_list = xrealloc_vector(G.input_file_list, 2, G.input_file_count);
G.input_file_list[G.input_file_count++] = file; G.input_file_list[G.input_file_count++] = file;
@ -1423,8 +1438,6 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
"file\0" Required_argument "f"; "file\0" Required_argument "f";
#endif #endif
int status = EXIT_SUCCESS;
INIT_G(); INIT_G();
/* destroy command strings on exit */ /* destroy command strings on exit */
@ -1491,27 +1504,21 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
if (argv[0] == NULL) { if (argv[0] == NULL) {
if (opt & OPT_in_place) if (opt & OPT_in_place)
bb_error_msg_and_die(bb_msg_requires_arg, "-i"); bb_error_msg_and_die(bb_msg_requires_arg, "-i");
add_input_file(stdin); add_input_file(bb_msg_standard_input);
} else { } else {
int i; int i;
for (i = 0; argv[i]; i++) { for (i = 0; argv[i]; i++) {
struct stat statbuf; struct stat statbuf;
int nonstdoutfd; int nonstdoutfd;
FILE *file;
sed_cmd_t *sed_cmd; sed_cmd_t *sed_cmd;
if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) { if (LONE_DASH(argv[i]) && !(opt & OPT_in_place)) {
add_input_file(stdin); add_input_file(bb_msg_standard_input);
process_files(); process_files();
continue; continue;
} }
file = fopen_or_warn(argv[i], "r"); add_input_file(argv[i]);
if (!file) {
status = EXIT_FAILURE;
continue;
}
add_input_file(file);
if (!(opt & OPT_in_place)) { if (!(opt & OPT_in_place)) {
continue; continue;
} }
@ -1523,7 +1530,7 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
G.nonstdout = xfdopen_for_write(nonstdoutfd); G.nonstdout = xfdopen_for_write(nonstdoutfd);
/* Set permissions/owner of output file */ /* Set permissions/owner of output file */
fstat(fileno(file), &statbuf); stat(argv[i], &statbuf);
/* chmod'ing AFTER chown would preserve suid/sgid bits, /* chmod'ing AFTER chown would preserve suid/sgid bits,
* but GNU sed 4.2.1 does not preserve them either */ * but GNU sed 4.2.1 does not preserve them either */
fchmod(nonstdoutfd, statbuf.st_mode); fchmod(nonstdoutfd, statbuf.st_mode);
@ -1549,12 +1556,13 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
} }
} }
/* Here, to handle "sed 'cmds' nonexistent_file" case we did: /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
* if (G.current_input_file >= G.input_file_count) * if (G.current_input_file[G.current_input_file] == NULL)
* return status; * return G.exitcode;
* but it's not needed since process_files() works correctly * but it's not needed since process_files() works correctly
* in this case too. */ * in this case too. */
} }
process_files(); process_files();
return status; return G.exitcode;
} }

View File

@ -18,7 +18,8 @@ int FAST_FUNC fclose_if_not_stdin(FILE *f)
{ {
/* Some more paranoid applets want ferror() check too */ /* Some more paranoid applets want ferror() check too */
int r = ferror(f); /* NB: does NOT set errno! */ int r = ferror(f); /* NB: does NOT set errno! */
if (r) errno = EIO; /* so we'll help it */ if (r)
errno = EIO; /* so we'll help it */
if (f != stdin) if (f != stdin)
return (r | fclose(f)); /* fclose does set errno on error */ return (r | fclose(f)); /* fclose does set errno on error */
return r; return r;