From 36f774a0cd2bf8dd72b192aab93831c5ac0c58f0 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Sun, 5 Sep 2010 14:45:38 +0200 Subject: [PATCH] hush: add support for ${var/pattern/repl}, conditional on bash compat function old new delta expand_vars_to_list 2386 2833 +447 expand_string_to_string 69 110 +41 parse_dollar 681 721 +40 hush_main 963 945 -18 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 3/1 up/down: 528/-18) Total: 510 bytes Signed-off-by: Denys Vlasenko --- shell/ash_test/ash-vars/var_bash3.tests | 7 - shell/hush.c | 179 ++++++++++++++++++---- shell/hush_test/hush-vars/var_bash1.right | 14 ++ shell/hush_test/hush-vars/var_bash1.tests | 18 +++ shell/hush_test/hush-vars/var_bash2.right | 10 ++ shell/hush_test/hush-vars/var_bash2.tests | 24 +++ shell/hush_test/hush-vars/var_bash3.right | 20 +++ shell/hush_test/hush-vars/var_bash3.tests | 41 +++++ shell/hush_test/hush-vars/var_bash4.right | 23 +++ shell/hush_test/hush-vars/var_bash4.tests | 47 ++++++ shell/hush_test/hush-vars/var_bash5.right | 4 + shell/hush_test/hush-vars/var_bash5.tests | 11 ++ 12 files changed, 362 insertions(+), 36 deletions(-) create mode 100644 shell/hush_test/hush-vars/var_bash1.right create mode 100755 shell/hush_test/hush-vars/var_bash1.tests create mode 100644 shell/hush_test/hush-vars/var_bash2.right create mode 100755 shell/hush_test/hush-vars/var_bash2.tests create mode 100644 shell/hush_test/hush-vars/var_bash3.right create mode 100755 shell/hush_test/hush-vars/var_bash3.tests create mode 100644 shell/hush_test/hush-vars/var_bash4.right create mode 100755 shell/hush_test/hush-vars/var_bash4.tests create mode 100644 shell/hush_test/hush-vars/var_bash5.right create mode 100755 shell/hush_test/hush-vars/var_bash5.tests diff --git a/shell/ash_test/ash-vars/var_bash3.tests b/shell/ash_test/ash-vars/var_bash3.tests index eca3318e2..146dbb6a5 100755 --- a/shell/ash_test/ash-vars/var_bash3.tests +++ b/shell/ash_test/ash-vars/var_bash3.tests @@ -3,13 +3,6 @@ r=${a//b/\041#} echo 1 $r echo 2 ${a//b/\041#} echo 3 "${a//b/\041#}" -# --- var_bash3.xx -# +++ var_bash3.right -# -1 a\041#c -# +1 a041#c -# 2 a041#c -# -3 a041#c -# +3 a\041#c a='abc' r=${a//b/\\041#} diff --git a/shell/hush.c b/shell/hush.c index 4f80b7d83..9a08e90c9 100644 --- a/shell/hush.c +++ b/shell/hush.c @@ -50,7 +50,6 @@ * * Bash compat TODO: * redirection of stdout+stderr: &> and >& - * subst operator: ${var/[/]expr/expr} * brace expansion: one/{two,three,four} * reserved words: function select * advanced test: [[ ]] @@ -330,6 +329,17 @@ #define _SPECIAL_VARS_STR "_*@$!?#" #define SPECIAL_VARS_STR ("_*@$!?#" + 1) #define NUMERIC_SPECVARS_STR ("_*@$!?#" + 3) +#if ENABLE_HUSH_BASH_COMPAT +/* Support / and // replace ops */ +/* Note that // is stored as \ in "encoded" string representation */ +# define VAR_ENCODED_SUBST_OPS "\\/%#:-=+?" +# define VAR_SUBST_OPS ("\\/%#:-=+?" + 1) +# define MINUS_PLUS_EQUAL_QUESTION ("\\/%#:-=+?" + 5) +#else +# define VAR_ENCODED_SUBST_OPS "%#:-=+?" +# define VAR_SUBST_OPS "%#:-=+?" +# define MINUS_PLUS_EQUAL_QUESTION ("%#:-=+?" + 3) +#endif #define SPECIAL_VAR_SYMBOL 3 @@ -2600,6 +2610,60 @@ static arith_t expand_and_evaluate_arith(const char *arg, int *errcode_p) } #endif +#if ENABLE_HUSH_BASH_COMPAT +/* ${var/[/]pattern[/repl]} helpers */ +static char *strstr_pattern(char *val, const char *pattern, int *size) +{ + while (1) { + char *end = scan_and_match(val, pattern, SCAN_MOVE_FROM_RIGHT + SCAN_MATCH_LEFT_HALF); + debug_printf_varexp("val:'%s' pattern:'%s' end:'%s'\n", val, pattern, end); + if (end) { + *size = end - val; + return val; + } + if (*val == '\0') + return NULL; + /* Optimization: if "*pat" did not match the start of "string", + * we know that "tring", "ring" etc will not match too: + */ + if (pattern[0] == '*') + return NULL; + val++; + } +} +static char *replace_pattern(char *val, const char *pattern, const char *repl, char exp_op) +{ + char *result = NULL; + unsigned res_len = 0; + unsigned repl_len = strlen(repl); + + while (1) { + int size; + char *s = strstr_pattern(val, pattern, &size); + if (!s) + break; + + result = xrealloc(result, res_len + (s - val) + repl_len + 1); + memcpy(result + res_len, val, s - val); + res_len += s - val; + strcpy(result + res_len, repl); + res_len += repl_len; + debug_printf_varexp("val:'%s' s:'%s' result:'%s'\n", val, s, result); + + val = s + size; + if (exp_op == '/') + break; + } + if (val[0] && result) { + result = xrealloc(result, res_len + strlen(val) + 1); + strcpy(result + res_len, val); + debug_printf_varexp("val:'%s' result:'%s'\n", val, result); + } + debug_printf_varexp("result:'%s'\n", result); + return result; +} +#endif + /* Expand all variable references in given string, adding words to list[] * at n, n+1,... positions. Return updated n (so that list[n] is next one * to be filled). This routine is extremely tricky: has to deal with @@ -2750,7 +2814,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char var = arg; *p = '\0'; - exp_saveptr = arg[1] ? strchr("%#:-=+?", arg[1]) : NULL; + exp_saveptr = arg[1] ? strchr(VAR_ENCODED_SUBST_OPS, arg[1]) : NULL; first_char = arg[0] = first_ch & 0x7f; exp_op = 0; @@ -2767,7 +2831,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char exp_saveptr = var + 1; } else { /* ${?}, ${var}, ${var:0}, ${var[:]%0} etc */ - exp_saveptr = var+1 + strcspn(var+1, "%#:-=+?"); + exp_saveptr = var+1 + strcspn(var+1, VAR_ENCODED_SUBST_OPS); } exp_op = exp_save = *exp_saveptr; if (exp_op) { @@ -2775,7 +2839,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char if (exp_op == ':') { exp_op = *exp_word++; if (ENABLE_HUSH_BASH_COMPAT - && (exp_op == '\0' || !strchr("%#:-=+?"+3, exp_op)) + && (exp_op == '\0' || !strchr(MINUS_PLUS_EQUAL_QUESTION, exp_op)) ) { /* oops... it's ${var:N[:M]}, not ${var:?xxx} or some such */ exp_op = ':'; @@ -2799,7 +2863,7 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char val = utoa(G.root_pid); break; case '!': /* bg pid */ - val = G.last_bg_pid ? utoa(G.last_bg_pid) : (char*)""; + val = G.last_bg_pid ? utoa(G.last_bg_pid) : ""; break; case '?': /* exitcode */ val = utoa(G.last_exitcode); @@ -2843,13 +2907,47 @@ static NOINLINE int expand_vars_to_list(o_string *output, int n, char *arg, char // exp_op, to_be_freed, exp_word, loc); free(exp_exp_word); if (loc) { /* match was found */ - if (scan_flags & SCAN_MATCH_LEFT_HALF) /* # or ## */ + if (scan_flags & SCAN_MATCH_LEFT_HALF) /* #[#] */ val = loc; - else /* % or %% */ + else /* %[%] */ *loc = '\0'; } } - } else if (exp_op == ':') { + } +#if ENABLE_HUSH_BASH_COMPAT + else if (exp_op == '/' || exp_op == '\\') { + /* Empty variable always gives nothing: */ + // "v=''; echo ${v/*/w}" prints "" + if (val && val[0]) { + /* It's ${var/[/]pattern[/repl]} thing */ + char *pattern, *repl, *t; + pattern = expand_pseudo_dquoted(exp_word); + if (!pattern) + pattern = xstrdup(exp_word); + debug_printf_varexp("pattern:'%s'->'%s'\n", exp_word, pattern); + *p++ = SPECIAL_VAR_SYMBOL; + exp_word = p; + p = strchr(p, SPECIAL_VAR_SYMBOL); + *p = '\0'; + repl = expand_pseudo_dquoted(exp_word); + debug_printf_varexp("repl:'%s'->'%s'\n", exp_word, repl); + /* HACK ALERT. We depend here on the fact that + * G.global_argv and results of utoa and get_local_var_value + * are actually in writable memory: + * replace_pattern momentarily stores NULs there. */ + t = (char*)val; + to_be_freed = replace_pattern(t, + pattern, + (repl ? repl : exp_word), + exp_op); + if (to_be_freed) /* at least one replace happened */ + val = to_be_freed; + free(pattern); + free(repl); + } + } +#endif + else if (exp_op == ':') { #if ENABLE_HUSH_BASH_COMPAT && ENABLE_SH_MATH_SUPPORT /* It's ${var:N[:M]} bashism. * Note that in encoded form it has TWO parts: @@ -3084,6 +3182,16 @@ static char *expand_string_to_string(const char *str) { char *argv[2], **list; + /* This is generally an optimization, but it also + * handles "", which otherwise trips over !list[0] check below. + * (is this ever happens that we actually get str="" here?) + */ + if (!strchr(str, SPECIAL_VAR_SYMBOL) && !strchr(str, '\\')) { + //TODO: Can use on strings with \ too, just unbackslash() them? + debug_printf_expand("string_to_string(fast)='%s'\n", str); + return xstrdup(str); + } + argv[0] = (char*)str; argv[1] = NULL; list = expand_variables(argv, EXPVAR_FLAG_ESCAPE_VARS | EXPVAR_FLAG_SINGLEWORD); @@ -3271,7 +3379,7 @@ static void re_execute_shell(char ***to_free, const char *s, *pp++ = (char *) G.argv0_for_re_execing; *pp++ = param_buf; for (cur = G.top_var; cur; cur = cur->next) { - if (cur->varstr == hush_version_str) + if (strcmp(cur->varstr, hush_version_str) == 0) continue; if (cur->flg_read_only) { *pp++ = (char *) "-R"; @@ -6170,8 +6278,8 @@ static void add_till_backquote(o_string *dest, struct in_str *input) * * Also adapted to eat ${var%...} and $((...)) constructs, since ... part * can contain arbitrary constructs, just like $(cmd). - * In bash compat mode, it needs to also be able to stop on '}' or ':' - * for ${var:N[:M]} parsing. + * In bash compat mode, it needs to also be able to stop on ':' or '/' + * for ${var:N[:M]} and ${var/P[/R]} parsing. */ #define DOUBLE_CLOSE_CHAR_FLAG 0x80 static int add_till_closing_bracket(o_string *dest, struct in_str *input, unsigned end_ch) @@ -6323,19 +6431,30 @@ static int parse_dollar(o_string *as_string, /* handle parameter expansions * http://www.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_06_02 */ - if (!strchr("%#:-=+?", ch)) /* ${var... */ + if (!strchr(VAR_SUBST_OPS, ch)) /* ${var... */ goto bad_dollar_syntax; - o_addchr(dest, ch); /* Eat everything until closing '}' (or ':') */ end_ch = '}'; if (ENABLE_HUSH_BASH_COMPAT && ch == ':' - && !strchr("%#:-=+?"+3, i_peek(input)) + && !strchr(MINUS_PLUS_EQUAL_QUESTION, i_peek(input)) ) { /* It's ${var:N[:M]} thing */ end_ch = '}' * 0x100 + ':'; } + if (ENABLE_HUSH_BASH_COMPAT + && ch == '/' + ) { + /* It's ${var/[/]pattern[/repl]} thing */ + if (i_peek(input) == '/') { /* ${var//pattern[/repl]}? */ + i_getch(input); + nommu_addchr(as_string, '/'); + ch = '\\'; + } + end_ch = '}' * 0x100 + '/'; + } + o_addchr(dest, ch); again: if (!BB_MMU) pos = dest->length; @@ -6352,14 +6471,18 @@ static int parse_dollar(o_string *as_string, if (ENABLE_HUSH_BASH_COMPAT && (end_ch & 0xff00)) { /* close the first block: */ o_addchr(dest, SPECIAL_VAR_SYMBOL); - /* while parsing N from ${var:N[:M]}... */ + /* while parsing N from ${var:N[:M]} + * or pattern from ${var/[/]pattern[/repl]} */ if ((end_ch & 0xff) == last_ch) { - /* ...got ':' - parse the rest */ + /* got ':' or '/'- parse the rest */ end_ch = '}'; goto again; } - /* ...got '}', not ':' - it's ${var:N}! emulate :999999999 */ - o_addstr(dest, "999999999"); + /* got '}' */ + if (end_ch == '}' * 0x100 + ':') { + /* it's ${var:N} - emulate :999999999 */ + o_addstr(dest, "999999999"); + } /* else: it's ${var/[/]pattern} */ } break; } @@ -7186,13 +7309,6 @@ static int set_mode(const char cstate, const char mode) int hush_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; int hush_main(int argc, char **argv) { - static const struct variable const_shell_ver = { - .next = NULL, - .varstr = (char*)hush_version_str, - .max_len = 1, /* 0 can provoke free(name) */ - .flg_export = 1, - .flg_read_only = 1, - }; int opt; unsigned builtin_argc; char **e; @@ -7205,10 +7321,18 @@ int hush_main(int argc, char **argv) G.argv0_for_re_execing = argv[0]; #endif /* Deal with HUSH_VERSION */ - G.shell_ver = const_shell_ver; /* copying struct here */ + G.shell_ver.flg_export = 1; + G.shell_ver.flg_read_only = 1; + /* Code which handles ${var/P/R} needs writable values for all variables, + * therefore we xstrdup: */ + G.shell_ver.varstr = xstrdup(hush_version_str), G.top_var = &G.shell_ver; debug_printf_env("unsetenv '%s'\n", "HUSH_VERSION"); unsetenv("HUSH_VERSION"); /* in case it exists in initial env */ + /* reinstate HUSH_VERSION in environment */ + debug_printf_env("putenv '%s'\n", G.shell_ver.varstr); + putenv(G.shell_ver.varstr); + /* Initialize our shell local variables with the values * currently living in the environment */ cur_var = G.top_var; @@ -7224,9 +7348,6 @@ int hush_main(int argc, char **argv) } e++; } - /* reinstate HUSH_VERSION */ - debug_printf_env("putenv '%s'\n", hush_version_str); - putenv((char *)hush_version_str); /* Export PWD */ set_pwd_var(/*exp:*/ 1); diff --git a/shell/hush_test/hush-vars/var_bash1.right b/shell/hush_test/hush-vars/var_bash1.right new file mode 100644 index 000000000..c0a07699b --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash1.right @@ -0,0 +1,14 @@ + + +f +bcdef +abcdef +abcdef +bcde +abcd +abcd +abcdef +bcdef +abcdef +abcdef +abcdef diff --git a/shell/hush_test/hush-vars/var_bash1.tests b/shell/hush_test/hush-vars/var_bash1.tests new file mode 100755 index 000000000..24d3c9a00 --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash1.tests @@ -0,0 +1,18 @@ +var=abcdef + +echo ${var:7} +echo ${var:6} +echo ${var:5} +echo ${var:1} +echo ${var:0} +echo ${var:-1} + +echo ${var:1:4} +echo ${var:0:4} +echo ${var::4} +echo ${var:-1:4} + +echo ${var:1:7} +echo ${var:0:7} +echo ${var::7} +echo ${var:-1:7} diff --git a/shell/hush_test/hush-vars/var_bash2.right b/shell/hush_test/hush-vars/var_bash2.right new file mode 100644 index 000000000..acba5c6fb --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash2.right @@ -0,0 +1,10 @@ +abc123xcba123 +abx123dcba123 +abx123dxba123 +abcx23dcba123 +abcxxxdcbaxxx +abx +xba123 +abx23 +abc23dcba123 +abcdcba diff --git a/shell/hush_test/hush-vars/var_bash2.tests b/shell/hush_test/hush-vars/var_bash2.tests new file mode 100755 index 000000000..29c526cef --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash2.tests @@ -0,0 +1,24 @@ +var=abc123dcba123 + +echo ${var/d/x} +echo ${var/c/x} +echo ${var//c/x} +echo ${var/[123]/x} +echo ${var//[123]/x} +echo ${var/c*/x} +echo ${var/*c/x} + +# must match longest match: result is "abx23" +echo ${var/c*1/x} + +# empty replacement - 2nd slash can be omitted +echo ${var/[123]} +echo ${var//[123]} + +### ash doesn't support +### # match only at the beginning: +### echo ${var/#a/x} +### echo ${var/#b/x} # should not match +### echo ${var//#b/x} # should not match +### # match only at the end: +### echo ${var/%3/x} diff --git a/shell/hush_test/hush-vars/var_bash3.right b/shell/hush_test/hush-vars/var_bash3.right new file mode 100644 index 000000000..a97c850ea --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash3.right @@ -0,0 +1,20 @@ +1 a041#c +2 a041#c +3 a\041#c +4 a\041#c +5 a\041#c +6 a\041#c +7 a\041#c +8 a\041#c +9 a\041#c +10 a\c +11 a\c +12 a\c +13 a\\c +14 a\\c +15 a\\c +16 a\tc +17 a\tc +18 a\tc +19 atc +20 a\tc diff --git a/shell/hush_test/hush-vars/var_bash3.tests b/shell/hush_test/hush-vars/var_bash3.tests new file mode 100755 index 000000000..146dbb6a5 --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash3.tests @@ -0,0 +1,41 @@ +a='abc' +r=${a//b/\041#} +echo 1 $r +echo 2 ${a//b/\041#} +echo 3 "${a//b/\041#}" + +a='abc' +r=${a//b/\\041#} +echo 4 $r +echo 5 ${a//b/\\041#} +echo 6 "${a//b/\\041#}" + +a='abc' +b='\041#' +r=${a//b/$b} +echo 7 $r +echo 8 ${a//b/$b} +echo 9 "${a//b/$b}" + +a='abc' +b='\' +r="${a//b/$b}" +echo 10 $r +echo 11 ${a//b/$b} +echo 12 "${a//b/$b}" + +a='abc' +b='\\' +r="${a//b/$b}" +echo 13 $r +echo 14 ${a//b/$b} +echo 15 "${a//b/$b}" + +a='abc' +b='\t' +r="${a//b/$b}" +echo 16 $r +echo 17 ${a//b/$b} +echo 18 "${a//b/$b}" +echo 19 ${a//b/\t} +echo 20 "${a//b/\t}" diff --git a/shell/hush_test/hush-vars/var_bash4.right b/shell/hush_test/hush-vars/var_bash4.right new file mode 100644 index 000000000..600e8532f --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash4.right @@ -0,0 +1,23 @@ +Source: a*b\*c +Replace str: _\\_\z_ +Pattern: single backslash and star: "replace literal star" +In assignment: a_\_z_b\*c +Unquoted: a_\_z_b\*c +Quoted: a_\_\z_b\*c +Pattern: double backslash and star: "replace backslash and everything after it" +In assignment: a*b_\_z_ +Unquoted: a*b_\_z_ +Quoted: a*b_\_\z_ + +Source: a\bc +Replace str: _\\_\z_ +Pattern: single backslash and b: "replace b" +In assignment: a\_\_z_c +Unquoted: a\_\_z_c +Quoted: a\_\_\z_c +Pattern: double backslash and b: "replace backslash and b" +In assignment: a_\_z_c +Unquoted: a_\_z_c +Quoted: a_\_\z_c + +Done: 0 diff --git a/shell/hush_test/hush-vars/var_bash4.tests b/shell/hush_test/hush-vars/var_bash4.tests new file mode 100755 index 000000000..d5470614b --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash4.tests @@ -0,0 +1,47 @@ +# This testcase demonstrates that backslashes are treated differently +# in 1st and 2nd parts of ${var/search/repl}: +# if quoted ("${var/search/repl}"), and repl contains \a (a non-special char), +# the backslash in repl stays; if unquoted, backslash is removed. +# But search part does not act like that: \a is always converted to just a, +# even in quotes. +# +# bash4 (and probably bash3 too): "Quoted:" results are different from +# unquoted and assignment expansions - they have a backslash before z. + +v='a*b\*c' +echo 'Source: ' "$v" +echo 'Replace str: ' '_\\_\z_' + +echo 'Pattern: ' 'single backslash and star: "replace literal star"' +r=${v/\*/_\\_\z_} +echo 'In assignment:' "$r" +echo 'Unquoted: ' ${v/\*/_\\_\z_} +echo 'Quoted: ' "${v/\*/_\\_\z_}" + +echo 'Pattern: ' 'double backslash and star: "replace backslash and everything after it"' +r=${v/\\*/_\\_\z_} +echo 'In assignment:' "$r" +echo 'Unquoted: ' ${v/\\*/_\\_\z_} +echo 'Quoted: ' "${v/\\*/_\\_\z_}" + +echo + +v='a\bc' +echo 'Source: ' "$v" +echo 'Replace str: ' '_\\_\z_' + +echo 'Pattern: ' 'single backslash and b: "replace b"' +r=${v/\b/_\\_\z_} +echo 'In assignment:' "$r" +echo 'Unquoted: ' ${v/\b/_\\_\z_} +echo 'Quoted: ' "${v/\b/_\\_\z_}" + +echo 'Pattern: ' 'double backslash and b: "replace backslash and b"' +r=${v/\\b/_\\_\z_} +echo 'In assignment:' "$r" +echo 'Unquoted: ' ${v/\\b/_\\_\z_} +echo 'Quoted: ' "${v/\\b/_\\_\z_}" + +echo + +echo Done: $? diff --git a/shell/hush_test/hush-vars/var_bash5.right b/shell/hush_test/hush-vars/var_bash5.right new file mode 100644 index 000000000..278ed3228 --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash5.right @@ -0,0 +1,4 @@ +a/ +a/d +a/e/f +Done: 0 diff --git a/shell/hush_test/hush-vars/var_bash5.tests b/shell/hush_test/hush-vars/var_bash5.tests new file mode 100755 index 000000000..7f482a554 --- /dev/null +++ b/shell/hush_test/hush-vars/var_bash5.tests @@ -0,0 +1,11 @@ +# This testcase checks whether slashes in ${v/a/b} are parsed before +# or after expansions + +v='a/b/c' +s='b/c' +r='e/f' +echo "${v/$s}" +echo "${v/$s/d}" +echo "${v/$s/$r}" + +echo Done: $?