Patch from Rob Landley

Fixed a memory leak in add_cmd/add_cmd_str by moving the allocation of sed_cmd down to where it's actually first needed. In get_address, if index_of_next_unescaped_regexp_delim ever failed, we wouldn't notice because the return value was added to idx, which was already guaranteed to be > 0. (This is buried in the changes made when I redid get_address to be based on pointer arithmetic, because all the tests were gratuitously dereferencing with a constant zero, which wasn't obvious.) Comment in parse_regex_delim was wrong: 's' and 'y' both call it. The reason "sed_cmd->num_backrefs = 0;" isn't needed is that sed_cmd was allocated with cmalloc, which zeroes memory. Different handling of space after \ in i... Different handling of pattern "s/a/b s/c/d" Cool, resursive reads don't cause a crash. :) Fixed "sed -f blah filename - < filename" since GNU sed was handling both - and filenames on the same line. (You can even list - more than once, although it's immediate EOF...)
2024-07-08 01:28:56 +00:00 · 2003-09-14 04:06:12 +00:00 · 2003-09-14 04:06:12 +00:00 · 8aac05bfe5
commit 8aac05bfe5
parent 7c59a83a77
1 changed files with 47 additions and 60 deletions
--- a/editors/sed.c
+++ b/editors/sed.c
@ -73,7 +73,6 @@ typedef struct sed_cmd_s {
 	/* inversion flag */
 	int invert;			/* the '!' after the address */
 //	int block_cmd;	/* This command is part of a group that has a command address */
 	/* Runtime flag no not if the current command match's */
 	int still_in_range;
@ -193,13 +192,16 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter,
 	return -1;
 }
 /*
 *  Returns the index of the third delimiter
 */
 static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 {
 	const char *cmdstr_ptr = cmdstr;
 	char delimiter;
 	int idx = 0;
-	/* verify that the 's' is followed by something.  That something
+	/* verify that the 's' or 'y' is followed by something.  That something
 	 * (typically a 'slash') is now our regexp delimiter... */
 	if (*cmdstr == '\0')
 		bb_error_msg_and_die(bad_format_in_subst);
@ -231,38 +233,35 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace)
 */
 static int get_address(char *my_str, int *linenum, regex_t ** regex)
 {
-	int idx = 0;
+	char *pos=my_str;
-	if (isdigit(my_str[idx])) {
+	if (isdigit(*my_str)) {
-		char *endstr;
+		*linenum = strtol(my_str, &pos, 10);
 		*linenum = strtol(my_str, &endstr, 10);
 		/* endstr shouldnt ever equal NULL */
-		idx = endstr - my_str;
+	} else if (*my_str == '$') {
 	} else if (my_str[idx] == '$') {
 		*linenum = -1;
-		idx++;
+		pos++;
-	} else if (my_str[idx] == '/' || my_str[idx] == '\\') {
+	} else if (*my_str == '/' || *my_str == '\\') {
-		int idx_start = 1;
+		int next, idx_start = 1;
 		char delimiter;
 		delimiter = '/';
-		if (my_str[idx] == '\\') {
+		if (*my_str == '\\') {
 			idx_start++;
-			delimiter = my_str[++idx];
+			delimiter = *(++pos);
 		}
-		idx++;
+		next = index_of_next_unescaped_regexp_delim(delimiter, ++pos);
-		idx += index_of_next_unescaped_regexp_delim(delimiter, my_str + idx);
+		if (next == -1) {
 		if (idx == -1) {
 			bb_error_msg_and_die("unterminated match expression");
 		}
-		my_str[idx] = '\0';
+		pos += next;
 		*pos = '\0';
 		*regex = (regex_t *) xmalloc(sizeof(regex_t));
 		xregcomp(*regex, my_str + idx_start, REG_NEWLINE);
-		idx++;			/* so it points to the next character after the last '/' */
+		pos++;			/* so it points to the next character after the last '/' */
 	}
-	return idx;
+	return pos - my_str;
 }
 static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
@ -287,7 +286,6 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
 	 * function to save processor time, at the expense of a little more memory
 	 * (4 bits) per sed_cmd */
 	/* sed_cmd->num_backrefs = 0; *//* XXX: not needed? --apparently not */
 	for (j = 0; match[j]; j++) {
 		/* GNU/POSIX sed does not save more than nine backrefs */
 		if (match[j] == '\\' && match[j + 1] == '('
@ -333,17 +331,17 @@ static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr)
 static void replace_slash_n(char *string)
 {
-	int i;
+	char *dest;
 	int remaining = strlen(string);
-	for (i = 0; string[i]; i++) {
+	for (dest = string; *string; string++, dest++) {
-		if ((string[i] == '\\') && (string[i + 1] == 'n')) {
+		if ((string[0] == '\\') && (string[1] == 'n')) {
-			string[i] = '\n';
+			*dest = '\n';
-			memmove(string + i + 1, string + i + 1, remaining - 1);
+			string++;
 		} else {
-			remaining--;
+			*dest = *string;
 		}
 	}
 	*dest=0;
 }
 static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr)
@ -431,7 +429,7 @@ static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr)
 	 *   re: the file to be read, the GNU manual says the following: "Note that
 	 *   if filename cannot be read, it is treated as if it were an empty file,
 	 *   without any error indication." Thus, all of the following commands are
-	 *   perfectly leagal:
+	 *   perfectly legal:
 	 *
 	 *   sed -e '1r noexist'
 	 *   sed -e '1r ;'
@ -496,8 +494,10 @@ static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr)
 	return (cmdstr);
 }
-static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
+static char *add_cmd(char *cmdstr)
 {
 	sed_cmd_t *sed_cmd;
 	/* Skip over leading whitespace and semicolons */
 	cmdstr += strspn(cmdstr, semicolon_whitespace);
@ -522,6 +522,8 @@ static char *add_cmd(sed_cmd_t *sed_cmd, char *cmdstr)
 	 *            part1 part2  part3
 	 */
 	sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
 	/* first part (if present) is an address: either a '$', a number or a /regex/ */
 	cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match);
@ -595,10 +597,7 @@ static void add_cmd_str(char *cmdstr)
 	}
 #endif
 	do {
-		sed_cmd_t *sed_cmd;
+		cmdstr = add_cmd(cmdstr);
 		sed_cmd = xcalloc(1, sizeof(sed_cmd_t));
 		cmdstr = add_cmd(sed_cmd, cmdstr);
 	} while (cmdstr && strlen(cmdstr));
 }
@ -651,18 +650,6 @@ void pipe_putc(struct pipeline *const pipeline, char c)
 #define pipeputc(c) 	pipe_putc(pipeline, c)
 #if 0
 {
 	if (pipeline[pipeline_idx] == PIPE_MAGIC) {
 		pipeline = xrealloc(pipeline, pipeline_len + PIPE_GROW);
 		memset(pipeline + pipeline_len, 0, PIPE_GROW);
 		pipeline_len += PIPE_GROW;
 		pipeline[pipeline_len - 1] = PIPE_MAGIC;
 	}
 	pipeline[pipeline_idx++] = (c);
 }
 #endif
 static void print_subst_w_backrefs(const char *line, const char *replace,
 	regmatch_t * regmatch, struct pipeline *const pipeline, int matches)
 {
@ -1157,31 +1144,31 @@ extern int sed_main(int argc, char **argv)
 	if (sed_cmd_head.next == NULL) {
 		if (argv[optind] == NULL)
 			bb_show_usage();
-		else {
+		else
-			char *str_cmd = strdup(argv[optind]);
+			add_cmd_str(strdup(argv[optind++]));
 			add_cmd_str(str_cmd);
 			free(str_cmd);
 			optind++;
 		}
 	}
 	/* argv[(optind)..(argc-1)] should be names of file to process. If no
 	 * files were specified or '-' was specified, take input from stdin.
 	 * Otherwise, we process all the files specified. */
-	if (argv[optind] == NULL || (strcmp(argv[optind], "-") == 0)) {
+	if (argv[optind] == NULL) {
 		process_file(stdin);
 	} else {
 		int i;
 		FILE *file;
 		for (i = optind; i < argc; i++) {
-			file = bb_wfopen(argv[i], "r");
+			if(!strcmp(argv[i], "-")) {
-			if (file) {
+				process_file(stdin);
-				process_file(file);
+			} else {
-				fclose(file);
+				file = bb_wfopen(argv[i], "r");
-			} else
+				if (file) {
-				status = EXIT_FAILURE;
+					process_file(file);
 					fclose(file);
 				} else {
 					status = EXIT_FAILURE;
 				}
 			}
 		}
 	}