From 52d83708364f85463fbc3756420b4068df13aab7 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Tue, 3 May 2011 00:51:43 +0200 Subject: [PATCH] sed: code shrink text data bss dec hex filename 876354 493 7584 884431 d7ecf busybox_old 876323 493 7584 884400 d7eb0 busybox_unstripped Signed-off-by: Denys Vlasenko --- editors/sed.c | 146 +++++++++++++++++++++++++++++++------------------- 1 file changed, 92 insertions(+), 54 deletions(-) diff --git a/editors/sed.c b/editors/sed.c index 9ab758bd7..9e27e3e18 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -14,49 +14,47 @@ */ /* Code overview. + * + * Files are laid out to avoid unnecessary function declarations. So for + * example, every function add_cmd calls occurs before add_cmd in this file. + * + * add_cmd() is called on each line of sed command text (from a file or from + * the command line). It calls get_address() and parse_cmd_args(). The + * resulting sed_cmd_t structures are appended to a linked list + * (G.sed_cmd_head/G.sed_cmd_tail). + * + * add_input_file() adds a FILE* to the list of input files. We need to + * know all input sources ahead of time to find the last line for the $ match. + * + * process_files() does actual sedding, reading data lines from each input FILE * + * (which could be stdin) and applying the sed command list (sed_cmd_head) to + * each of the resulting lines. + * + * sed_main() is where external code calls into this, with a command line. + */ - Files are laid out to avoid unnecessary function declarations. So for - example, every function add_cmd calls occurs before add_cmd in this file. - - add_cmd() is called on each line of sed command text (from a file or from - the command line). It calls get_address() and parse_cmd_args(). The - resulting sed_cmd_t structures are appended to a linked list - (G.sed_cmd_head/G.sed_cmd_tail). - - add_input_file() adds a FILE* to the list of input files. We need to - know all input sources ahead of time to find the last line for the $ match. - - process_files() does actual sedding, reading data lines from each input FILE * - (which could be stdin) and applying the sed command list (sed_cmd_head) to - each of the resulting lines. - - sed_main() is where external code calls into this, with a command line. -*/ - - -/* - Supported features and commands in this version of sed: - - - comments ('#') - - address matching: num|/matchstr/[,num|/matchstr/|$]command - - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) - - edit commands: (a)ppend, (i)nsert, (c)hange - - file commands: (r)ead - - backreferences in substitution expressions (\0, \1, \2...\9) - - grouped commands: {cmd1;cmd2} - - transliteration (y/source-chars/dest-chars/) - - pattern space hold space storing / swapping (g, h, x) - - labels / branching (: label, b, t, T) - - (Note: Specifying an address (range) to match is *optional*; commands - default to the whole pattern space if no specific address match was - requested.) - - Todo: - - Create a wrapper around regex to make libc's regex conform with sed - - Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html -*/ +/* Supported features and commands in this version of sed: + * + * - comments ('#') + * - address matching: num|/matchstr/[,num|/matchstr/|$]command + * - commands: (p)rint, (d)elete, (s)ubstitue (with g & I flags) + * - edit commands: (a)ppend, (i)nsert, (c)hange + * - file commands: (r)ead + * - backreferences in substitution expressions (\0, \1, \2...\9) + * - grouped commands: {cmd1;cmd2} + * - transliteration (y/source-chars/dest-chars/) + * - pattern space hold space storing / swapping (g, h, x) + * - labels / branching (: label, b, t, T) + * + * (Note: Specifying an address (range) to match is *optional*; commands + * default to the whole pattern space if no specific address match was + * requested.) + * + * Todo: + * - Create a wrapper around regex to make libc's regex conform with sed + * + * Reference http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html + */ //usage:#define sed_trivial_usage //usage: "[-efinr] SED_CMD [FILE]..." @@ -244,11 +242,13 @@ static int index_of_next_unescaped_regexp_delim(int delimiter, const char *str) delimiter = -delimiter; } - for (; (ch = str[idx]); idx++) { + for (; (ch = str[idx]) != '\0'; idx++) { if (bracket >= 0) { - if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 - && str[idx - 1] == '^'))) + if (ch == ']' + && !(bracket == idx - 1 || (bracket == idx - 2 && str[idx - 1] == '^')) + ) { bracket = -1; + } } else if (escaped) escaped = 0; else if (ch == '\\') @@ -434,11 +434,47 @@ static int parse_subst_cmd(sed_cmd_t *sed_cmd, const char *substr) */ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) { + static const char cmd_letters[] = "saicrw:btTydDgGhHlnNpPqx={}"; + enum { + IDX_s = 0, + IDX_a, + IDX_i, + IDX_c, + IDX_r, + IDX_w, + IDX_colon, + IDX_b, + IDX_t, + IDX_T, + IDX_y, + IDX_d, + IDX_D, + IDX_g, + IDX_G, + IDX_h, + IDX_H, + IDX_l, + IDX_n, + IDX_N, + IDX_p, + IDX_P, + IDX_q, + IDX_x, + IDX_equal, + IDX_lbrace, + IDX_rbrace, + IDX_nul + }; + struct chk { char chk[sizeof(cmd_letters)-1 == IDX_nul ? 1 : -1]; }; + + unsigned idx = strchrnul(cmd_letters, sed_cmd->cmd) - cmd_letters; + /* handle (s)ubstitution command */ - if (sed_cmd->cmd == 's') + if (idx == IDX_s) { cmdstr += parse_subst_cmd(sed_cmd, cmdstr); + } /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ - else if (strchr("aic", sed_cmd->cmd)) { + else if (idx <= IDX_c) { /* a,i,c */ if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') bb_error_msg_and_die("only a beginning address can be specified for edit commands"); for (;;) { @@ -454,8 +490,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) /* "\anychar" -> "anychar" */ parse_escapes(sed_cmd->string, sed_cmd->string, strlen(cmdstr), '\0', '\0'); cmdstr += strlen(cmdstr); + } /* handle file cmds: (r)ead */ - } else if (strchr("rw", sed_cmd->cmd)) { + else if (idx <= IDX_w) { /* r,w */ if (sed_cmd->end_line || sed_cmd->end_match) bb_error_msg_and_die("command only uses one address"); cmdstr += parse_file_cmd(/*sed_cmd,*/ cmdstr, &sed_cmd->string); @@ -463,8 +500,9 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) sed_cmd->sw_file = xfopen_for_write(sed_cmd->string); sed_cmd->sw_last_char = '\n'; } + } /* handle branch commands */ - } else if (strchr(":btT", sed_cmd->cmd)) { + else if (idx <= IDX_T) { /* :,b,t,T */ int length; cmdstr = skip_whitespace(cmdstr); @@ -475,7 +513,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) } } /* translation command */ - else if (sed_cmd->cmd == 'y') { + else if (idx == IDX_y) { char *match, *replace; int i = cmdstr[0]; @@ -495,7 +533,7 @@ static const char *parse_cmd_args(sed_cmd_t *sed_cmd, const char *cmdstr) /* if it wasnt a single-letter command that takes no arguments * then it must be an invalid command. */ - else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { + else if (idx >= IDX_nul) { /* not d,D,g,G,h,H,l,n,N,p,P,q,x,=,{,} */ bb_error_msg_and_die("unsupported command %c", sed_cmd->cmd); } @@ -966,9 +1004,9 @@ static void process_files(void) } sed_cmd->in_match = !( /* has the ending line come, or is this a single address command? */ - (sed_cmd->end_line ? - sed_cmd->end_line == -1 ? - !next_line + (sed_cmd->end_line + ? sed_cmd->end_line == -1 + ? !next_line : (sed_cmd->end_line <= linenum) : !sed_cmd->end_match )