diff options
-rw-r--r-- | editors/Config.in | 22 | ||||
-rw-r--r-- | editors/sed.c | 1458 | ||||
-rw-r--r-- | include/libbb.h | 3 |
3 files changed, 680 insertions, 803 deletions
diff --git a/editors/Config.in b/editors/Config.in index bced12cb1..b491c2416 100644 --- a/editors/Config.in +++ b/editors/Config.in @@ -33,28 +33,6 @@ config CONFIG_SED sed is used to perform text transformations on a file or input from a pipeline. -config CONFIG_FEATURE_SED_EMBEDED_NEWLINE - bool " Embeded newline (EXPERIMENTAL)" - default n - depends on CONFIG_SED - help - This is a hack to allow matching of '\n' in regular expressions. - It works by translating '\n' to "\n" and back. - It may introduce unexpected results if you use "\n" in your text. - -config CONFIG_FEATURE_SED_GNU_COMPATABILITY - bool " Behave consistent with GNU sed" - default y - depends on CONFIG_SED - help - Where GNU sed doesnt follow the posix standard, do as GNU sed does. - Current difference are in - - N command with odd number of lines (see GNU sed info page) - - Blanks before substitution flags eg. - GNU sed interprets 's/a/b/ g' as 's/a/b/g' - Standard says 's/a/b/ g' should be 's/a/b/;g' - - GNU sed allows blanks between a '!' and the function. - config CONFIG_VI bool "vi" default n diff --git a/editors/sed.c b/editors/sed.c index 1c016ac57..6452a321c 100644 --- a/editors/sed.c +++ b/editors/sed.c @@ -1,3 +1,4 @@ +/* vi: set sw=4 ts=4: */ /* * sed.c - very minimalist version of sed * @@ -22,6 +23,24 @@ * */ +/* Code overview. + + Files are laid out to avoid unnecessary function declarations. So for + example, every function add_cmd calls occurs before add_cmd in this file. + + add_cmd() is called on each line of sed command text (from a file or from + the command line). It calls get_address() and parse_cmd_args(). The + resulting sed_cmd_t structures are appended to a linked list + (sed_cmd_head/sed_cmd_tail). + + process_file() does actual sedding, reading data lines from an input FILE * + (which could be stdin) and applying the sed command list (sed_cmd_head) to + each of the resulting lines. + + sed_main() is where external code calls into this, with a command line. +*/ + + /* Supported features and commands in this version of sed: @@ -64,84 +83,72 @@ #include "busybox.h" typedef struct sed_cmd_s { - /* Order by alignment requirements */ - - /* address storage */ - regex_t *beg_match; /* sed -e '/match/cmd' */ - regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ - - int beg_line; /* 'sed 1p' 0 == no begining line, apply commands to all lines */ - int end_line; /* 'sed 1,3p' 0 == no end line, use only beginning. -1 == $ */ + /* Ordered by alignment requirements: currently 36 bytes on x86 */ - /* inversion flag */ - int invert; /* the '!' after the address */ + /* address storage */ + regex_t *beg_match; /* sed -e '/match/cmd' */ + regex_t *end_match; /* sed -e '/match/,/end_match/cmd' */ + regex_t *sub_match; /* For 's/sub_match/string/' */ + int beg_line; /* 'sed 1p' 0 == apply commands to all lines */ + int end_line; /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */ - /* Runtime flag no not if the current command match's */ - int still_in_range; + FILE *file; /* File (sr) command writes to, -1 for none. */ + char *string; /* Data string for (saicytb) commands. */ - /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ + unsigned short which_match; /* (s) Which match to replace (0 for all) */ - /* sed -e 's/sub_match/replace/' */ - regex_t *sub_match; - char *replace; + /* Bitfields (gcc won't group them if we don't) */ + unsigned int invert:1; /* the '!' after the address */ + unsigned int in_match:1; /* Next line also included in match? */ + unsigned int no_newline:1; /* Last line written by (sr) had no '\n' */ + unsigned int sub_p:1; /* (s) print option */ - /* EDIT COMMAND (a,i,c) SPECIFIC FIELDS */ - char *editline; - - /* FILE COMMAND (r) SPECIFIC FIELDS */ - char *filename; - - /* SUBSTITUTION COMMAND SPECIFIC FIELDS */ - - unsigned int num_backrefs:4; /* how many back references (\1..\9) */ - /* Note: GNU/POSIX sed does not save more than nine backrefs, so - * we only use 4 bits to hold the number */ - unsigned int sub_g:1; /* sed -e 's/foo/bar/g' (global) */ - unsigned int sub_p:1; /* sed -e 's/foo/bar/p' (print substitution) */ - - /* TRANSLATE COMMAND */ - char *translate; - - /* GENERAL FIELDS */ - /* the command */ - char cmd; /* p,d,s (add more at your leisure :-) */ - - /* Branch commands */ - char *label; - - /* next command in list (sequential list of specified commands) */ - struct sed_cmd_s *next; + /* GENERAL FIELDS */ + char cmd; /* The command char: abcdDgGhHilnNpPqrstwxy:={} */ + struct sed_cmd_s *next; /* Next command (linked list, NULL terminated) */ } sed_cmd_t; - -/* externs */ -extern void xregcomp(regex_t * preg, const char *regex, int cflags); -extern int optind; /* in unistd.h */ -extern char *optarg; /* ditto */ - /* globals */ /* options */ static int be_quiet = 0; + static const char bad_format_in_subst[] = "bad format in substitution expression"; +const char *const semicolon_whitespace = "; \n\r\t\v"; + +regmatch_t regmatch[10]; +static regex_t *previous_regex_ptr = NULL; /* linked list of sed commands */ static sed_cmd_t sed_cmd_head; static sed_cmd_t *sed_cmd_tail = &sed_cmd_head; -const char *const semicolon_whitespace = "; \n\r\t\v\0"; -static regex_t *previous_regex_ptr = NULL; - +/* Linked list of append lines */ +struct append_list { + char *string; + struct append_list *next; +}; +struct append_list *append_head=NULL, *append_tail=NULL; #ifdef CONFIG_FEATURE_CLEAN_UP -static void destroy_cmd_strs(void) +static void free_and_close_stuff(void) { sed_cmd_t *sed_cmd = sed_cmd_head.next; + while(append_head) { + append_tail=append_head->next; + free(append_head->string); + free(append_head); + append_head=append_tail; + } + while (sed_cmd) { sed_cmd_t *sed_cmd_next = sed_cmd->next; + if(sed_cmd->file) + bb_xprint_and_close_file(sed_cmd->file); + if (sed_cmd->beg_match) { regfree(sed_cmd->beg_match); free(sed_cmd->beg_match); @@ -154,17 +161,41 @@ static void destroy_cmd_strs(void) regfree(sed_cmd->sub_match); free(sed_cmd->sub_match); } - free(sed_cmd->replace); - free(sed_cmd->editline); - free(sed_cmd->filename); - free(sed_cmd->translate); - free(sed_cmd->label); + free(sed_cmd->string); free(sed_cmd); sed_cmd = sed_cmd_next; } } #endif +/* strdup, replacing "\n" with '\n', and "\delimiter" with 'delimiter' */ + +static void parse_escapes(char *dest, const char *string, int len, char from, char to) +{ + int i=0; + + while(i<len) { + if(string[i] == '\\') { + if(string[i+1] == from) { + *(dest++) = to; + i+=2; + continue; + } else *(dest++)=string[i++]; + } + *(dest++) = string[i++]; + } + *dest=0; +} + +static char *copy_parsing_slashn(const char *string, int len) +{ + char *dest=xmalloc(len+1); + + parse_escapes(dest,string,len,'n','\n'); + return dest; +} + + /* * index_of_next_unescaped_regexp_delim - walks left to right through a string * beginning at a specified index and returns the index of the next regular @@ -182,7 +213,7 @@ static int index_of_next_unescaped_regexp_delim(const char delimiter, for (; (ch = str[idx]); idx++) { if (bracket != -1) { if (ch == ']' && !(bracket == idx - 1 || (bracket == idx - 2 - && str[idx - 1] == '^'))) + && str[idx - 1] == '^'))) bracket = -1; } else if (escaped) escaped = 0; @@ -209,19 +240,15 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) /* verify that the 's' or 'y' is followed by something. That something * (typically a 'slash') is now our regexp delimiter... */ - if (*cmdstr == '\0') - bb_error_msg_and_die(bad_format_in_subst); - else - delimiter = *cmdstr_ptr; - - cmdstr_ptr++; + if (*cmdstr == '\0') bb_error_msg_and_die(bad_format_in_subst); + delimiter = *(cmdstr_ptr++); /* save the match string */ idx = index_of_next_unescaped_regexp_delim(delimiter, cmdstr_ptr); if (idx == -1) { bb_error_msg_and_die(bad_format_in_subst); } - *match = bb_xstrndup(cmdstr_ptr, idx); + *match = copy_parsing_slashn(cmdstr_ptr, idx); /* save the replacement string */ cmdstr_ptr += idx + 1; @@ -229,7 +256,7 @@ static int parse_regex_delim(const char *cmdstr, char **match, char **replace) if (idx == -1) { bb_error_msg_and_die(bad_format_in_subst); } - *replace = bb_xstrndup(cmdstr_ptr, idx); + *replace = copy_parsing_slashn(cmdstr_ptr, idx); return ((cmdstr_ptr - cmdstr) + idx); } @@ -248,94 +275,109 @@ static int get_address(char *my_str, int *linenum, regex_t ** regex) *linenum = -1; pos++; } else if (*my_str == '/' || *my_str == '\\') { - int next, idx_start = 1; + int next; char delimiter; + char *temp; - delimiter = '/'; - if (*my_str == '\\') { - idx_start++; - delimiter = *(++pos); - } + if (*my_str == '\\') delimiter = *(++pos); + else delimiter = '/'; next = index_of_next_unescaped_regexp_delim(delimiter, ++pos); - if (next == -1) { + if (next == -1) bb_error_msg_and_die("unterminated match expression"); - } - pos += next; - *pos = '\0'; - + + temp=copy_parsing_slashn(pos,next); *regex = (regex_t *) xmalloc(sizeof(regex_t)); - xregcomp(*regex, my_str + idx_start, REG_NEWLINE); - pos++; /* so it points to the next character after the last '/' */ + xregcomp(*regex, temp, REG_NEWLINE); + free(temp); + /* Move position to next character after last delimiter */ + pos+=(next+1); } return pos - my_str; } +/* Grab a filename. Whitespace at start is skipped, then goes to EOL. */ +static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr, char **retval) +{ + int start = 0, idx, hack=0; + + /* Skip whitespace, then grab filename to end of line */ + while (isspace(filecmdstr[start])) start++; + idx=start; + while(filecmdstr[idx] && filecmdstr[idx]!='\n') idx++; + /* If lines glued together, put backslash back. */ + if(filecmdstr[idx]=='\n') hack=1; + if(idx==start) bb_error_msg_and_die("Empty filename"); + *retval = bb_xstrndup(filecmdstr+start, idx-start+hack+1); + if(hack) *(idx+*retval)='\\'; + + return idx; +} + static int parse_subst_cmd(sed_cmd_t * const sed_cmd, const char *substr) { int cflags = 0; char *match; int idx = 0; - int j; /* - * the string that gets passed to this function should look like this: - * s/match/replace/gIp + * A substitution command should look something like this: + * s/match/replace/ #gIpw * || | ||| * mandatory optional - * - * (all three of the '/' slashes are mandatory) */ - idx = parse_regex_delim(substr, &match, &sed_cmd->replace); + idx = parse_regex_delim(substr, &match, &sed_cmd->string); /* determine the number of back references in the match string */ /* Note: we compute this here rather than in the do_subst_command() * function to save processor time, at the expense of a little more memory * (4 bits) per sed_cmd */ - for (j = 0; match[j]; j++) { - /* GNU/POSIX sed does not save more than nine backrefs */ - if (match[j] == '\\' && match[j + 1] == '(' - && sed_cmd->num_backrefs <= 9) - sed_cmd->num_backrefs++; - } - /* process the flags */ -#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY - idx++; -#else - /* GNU sed allows blanks before the flag, this can lead to an incosistent - * interpretation of 's/a/b/ g' as being either 's/a/b/g' or 's/a/b/;g'. - * which results in very different behaviour. - */ - while (substr[++idx]) -#endif + + sed_cmd->which_match=1; + while (substr[++idx]) { + /* Parse match number */ + if(isdigit(substr[idx])) { + if(match[0]!='^') { + /* Match 0 treated as all, multiple matches we take the last one. */ + char *pos=substr+idx; + sed_cmd->which_match=(unsigned short)strtol(substr+idx,&pos,10); + idx=pos-substr; + } + continue; + } switch (substr[idx]) { - case 'g': - if (match[0] != '^') { - sed_cmd->sub_g = 1; + /* Replace all occurrences */ + case 'g': + if (match[0] != '^') sed_cmd->which_match = 0; + break; + /* Print pattern space */ + case 'p': + sed_cmd->sub_p = 1; + break; + case 'w': + { + char *temp; + idx+=parse_file_cmd(sed_cmd,substr+idx,&temp); + + break; } - break; - /* Hmm, i dont see the I option mentioned in the standard */ - case 'I': - cflags |= REG_ICASE; - break; - case 'p': - sed_cmd->sub_p = 1; - break; -#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY - default: - /* any whitespace or semicolon trailing after a s/// is ok */ - if (strchr(semicolon_whitespace, substr[idx])) + /* Ignore case (gnu exension) */ + case 'I': + cflags |= REG_ICASE; + break; + /* Skip spaces */ + case ' ': + case '\t': + break; + case ';': + case '}': goto out; - bb_error_msg_and_die("bad option in substitution expression"); -#endif + default: + bb_error_msg_and_die("bad option in substitution expression"); } - -#ifndef CONFIG_FEATURE_SED_GNU_COMPATABILITY - idx++; -#else + } out: -#endif /* compile the match string into a regex */ if (*match != '\0') { /* If match is empty, we use last regex used at runtime */ @@ -347,166 +389,61 @@ out: return idx; } -static void replace_slash_n(char *string) -{ - char *dest; - - for (dest = string; *string; string++, dest++) { - if ((string[0] == '\\') && (string[1] == 'n')) { - *dest = '\n'; - string++; - } else { - *dest = *string; - } - } - *dest=0; -} - -static int parse_translate_cmd(sed_cmd_t * const sed_cmd, const char *cmdstr) -{ - char *match; - char *replace; - int idx; - int i; - - idx = parse_regex_delim(cmdstr, &match, &replace); - replace_slash_n(match); - replace_slash_n(replace); - sed_cmd->translate = xcalloc(1, (strlen(match) + 1) * 2); - for (i = 0; (match[i] != 0) && (replace[i] != 0); i++) { - sed_cmd->translate[i * 2] = match[i]; - sed_cmd->translate[(i * 2) + 1] = replace[i]; - } - return (idx + 1); -} - -static int parse_edit_cmd(sed_cmd_t * sed_cmd, const char *editstr) -{ - int i, j; - - /* - * the string that gets passed to this function should look like this: - * - * need one of these - * | - * | this backslash (immediately following the edit command) is mandatory - * | | - * [aic]\ - * TEXT1\ - * TEXT2\ - * TEXTN - * - * as soon as we hit a TEXT line that has no trailing '\', we're done. - * this means a command like: - * - * i\ - * INSERTME - * - * is a-ok. - * - */ - if ((*editstr != '\\') || ((editstr[1] != '\n') && (editstr[1] != '\r'))) { - bb_error_msg_and_die("bad format in edit expression"); - } - - /* store the edit line text */ - sed_cmd->editline = xmalloc(strlen(&editstr[2]) + 2); - for (i = 2, j = 0; - editstr[i] != '\0' && strchr("\r\n", editstr[i]) == NULL; i++, j++) { - if ((editstr[i] == '\\') && strchr("\n\r", editstr[i + 1]) != NULL) { - sed_cmd->editline[j] = '\n'; - i++; - } else - sed_cmd->editline[j] = editstr[i]; - } - - /* figure out if we need to add a newline */ - if (sed_cmd->editline[j - 1] != '\n') - sed_cmd->editline[j++] = '\n'; - - /* terminate string */ - sed_cmd->editline[j] = '\0'; - - return i; -} - - -static int parse_file_cmd(sed_cmd_t * sed_cmd, const char *filecmdstr) -{ - int idx = 0; - int filenamelen = 0; - - /* - * the string that gets passed to this function should look like this: - * '[ ]filename' - * | | - * | a filename - * | - * optional whitespace - - * re: the file to be read, the GNU manual says the following: "Note that - * if filename cannot be read, it is treated as if it were an empty file, - * without any error indication." Thus, all of the following commands are - * perfectly legal: - * - * sed -e '1r noexist' - * sed -e '1r ;' - * sed -e '1r' - */ - - /* the file command may be followed by whitespace; move past it. */ - while (isspace(filecmdstr[++idx])) {; - } - - /* the first non-whitespace we get is a filename. the filename ends when we - * hit a normal sed command terminator or end of string */ - filenamelen = strcspn(&filecmdstr[idx], semicolon_whitespace); - sed_cmd->filename = xmalloc(filenamelen + 1); - safe_strncpy(sed_cmd->filename, &filecmdstr[idx], filenamelen + 1); - return idx + filenamelen; -} - /* * Process the commands arguments */ -static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) +static char *parse_cmd_args(sed_cmd_t *sed_cmd, char *cmdstr) { /* handle (s)ubstitution command */ - if (sed_cmd->cmd == 's') { - cmdstr += parse_subst_cmd(sed_cmd, cmdstr); - } + if (sed_cmd->cmd == 's') cmdstr += parse_subst_cmd(sed_cmd, cmdstr); /* handle edit cmds: (a)ppend, (i)nsert, and (c)hange */ else if (strchr("aic", sed_cmd->cmd)) { if ((sed_cmd->end_line || sed_cmd->end_match) && sed_cmd->cmd != 'c') bb_error_msg_and_die ("only a beginning address can be specified for edit commands"); - cmdstr += parse_edit_cmd(sed_cmd, cmdstr); - } + while(isspace(*cmdstr)) cmdstr++; + sed_cmd->string = bb_xstrdup(cmdstr); + cmdstr += strlen(cmdstr); /* handle file cmds: (r)ead */ - else if (sed_cmd->cmd == 'r') { + } else if(strchr("rw", sed_cmd->cmd)) { if (sed_cmd->end_line || sed_cmd->end_match) bb_error_msg_and_die("Command only uses one address"); - cmdstr += parse_file_cmd(sed_cmd, cmdstr); - } + cmdstr += parse_file_cmd(sed_cmd, cmdstr, &sed_cmd->string); + if(sed_cmd->cmd=='w') + sed_cmd->file=bb_xfopen(sed_cmd->string,"w"); /* handle branch commands */ - else if (strchr(":bt", sed_cmd->cmd)) { + } else if (strchr(":bt", sed_cmd->cmd)) { int length; - cmdstr += strspn(cmdstr, " "); + while(isspace(*cmdstr)) cmdstr++; length = strcspn(cmdstr, semicolon_whitespace); if (length) { - sed_cmd->label = strndup(cmdstr, length); + sed_cmd->string = strndup(cmdstr, length); cmdstr += length; } } /* translation command */ else if (sed_cmd->cmd == 'y') { - cmdstr += parse_translate_cmd(sed_cmd, cmdstr); + char *match, *replace; + int i=cmdstr[0]; + + cmdstr+=parse_regex_delim(cmdstr, &match, &replace)+1; + /* \n already parsed, but \delimiter needs unescaping. */ + parse_escapes(match,match,strlen(match),i,i); + parse_escapes(replace,replace,strlen(replace),i,i); + + sed_cmd->string = xcalloc(1, (strlen(match) + 1) * 2); + for (i = 0; match[i] && replace[i]; i++) { + sed_cmd->string[i * 2] = match[i]; + sed_cmd->string[(i * 2) + 1] = replace[i]; + } + free(match); + free(replace); } /* if it wasnt a single-letter command that takes no arguments * then it must be an invalid command. */ - else if (strchr("dgGhHnNpPqx={}", sed_cmd->cmd) == 0) { + else if (strchr("dDgGhHlnNpPqx={}", sed_cmd->cmd) == 0) { bb_error_msg_and_die("Unsupported command %c", sed_cmd->cmd); } @@ -514,663 +451,595 @@ static char *parse_cmd_str(sed_cmd_t * sed_cmd, char *cmdstr) return (cmdstr); } -static char *add_cmd(char *cmdstr) -{ - sed_cmd_t *sed_cmd; - /* Skip over leading whitespace and semicolons */ - cmdstr += strspn(cmdstr, semicolon_whitespace); +/* Parse address+command sets, skipping comment lines. */ - /* if we ate the whole thing, that means there was just trailing - * whitespace or a final / no-op semicolon. either way, get out */ - if (*cmdstr == '\0') { - return (NULL); - } +void add_cmd(char *cmdstr) +{ + static char *add_cmd_line=NULL; + sed_cmd_t *sed_cmd; - /* if this is a comment, jump past it and keep going */ - if (*cmdstr == '#') { - /* "#n" is the same as using -n on the command line */ - if (cmdstr[1] == 'n') { - be_quiet++; - } - return (strpbrk(cmdstr, "\n\r")); + /* Append this line to any unfinished line from last time. */ + if(add_cmd_line) { + int lastlen=strlen(add_cmd_line); + char *temp=xmalloc(lastlen+strlen(cmdstr)+2); + + memcpy(temp,add_cmd_line,lastlen); + temp[lastlen]='\n'; + strcpy(temp+lastlen+1,cmdstr); + free(add_cmd_line); + cmdstr=add_cmd_line=temp; + } else add_cmd_line=NULL; + + /* If this line ends with backslash, request next line. */ + int temp=strlen(cmdstr); + if(temp && cmdstr[temp-1]=='\\') { + if(!add_cmd_line) add_cmd_line=strdup(cmdstr); + add_cmd_line[temp-1]=0; + return; } - /* parse the command - * format is: [addr][,addr]cmd - * |----||-----||-| - * part1 part2 part3 - */ + /* Loop parsing all commands in this line. */ + while(*cmdstr) { + /* Skip leading whitespace and semicolons */ + cmdstr += strspn(cmdstr, semicolon_whitespace); - sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); + /* If no more commands, exit. */ + if(!*cmdstr) break; - /* first part (if present) is an address: either a '$', a number or a /regex/ */ - cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); + /* if this is a comment, jump past it and keep going */ + if (*cmdstr == '#') { + /* "#n" is the same as using -n on the command line */ + if (cmdstr[1] == 'n') be_quiet++; + if(!(cmdstr=strpbrk(cmdstr, "\n\r"))) break; + continue; + } - /* second part (if present) will begin with a comma */ - if (*cmdstr == ',') { - int idx; + /* parse the command + * format is: [addr][,addr][!]cmd + * |----||-----||-| + * part1 part2 part3 + */ - cmdstr++; - idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); - if (idx == 0) { - bb_error_msg_and_die("get_address: no address found in string\n" - "\t(you probably didn't check the string you passed me)"); - } - cmdstr += idx; - } + sed_cmd = xcalloc(1, sizeof(sed_cmd_t)); - /* skip whitespace before the command */ - while (isspace(*cmdstr)) { - cmdstr++; - } + /* first part (if present) is an address: either a '$', a number or a /regex/ */ + cmdstr += get_address(cmdstr, &sed_cmd->beg_line, &sed_cmd->beg_match); - /* there my be the inversion flag between part2 and part3 */ - if (*cmdstr == '!') { - sed_cmd->invert = 1; - cmdstr++; + /* second part (if present) will begin with a comma */ + if (*cmdstr == ',') { + int idx; -#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY - /* According to the spec - * It is unspecified whether <blank>s can follow a '!' character, - * and conforming applications shall not follow a '!' character - * with <blank>s. - */ - /* skip whitespace before the command */ - while (isspace(*cmdstr)) { cmdstr++; + idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match); + if (!idx) bb_error_msg_and_die("get_address: no address found in string\n"); + cmdstr += idx; } -#endif - } - - /* last part (mandatory) will be a command */ - if (*cmdstr == '\0') - bb_error_msg_and_die("missing command"); - sed_cmd->cmd = *cmdstr; - cmdstr++; + /* skip whitespace before the command */ + while (isspace(*cmdstr)) cmdstr++; - cmdstr = parse_cmd_str(sed_cmd, cmdstr); + /* Check for inversion flag */ + if (*cmdstr == '!') { + sed_cmd->invert = 1; + cmdstr++; - /* Add the command to the command array */ - sed_cmd_tail->next = sed_cmd; - sed_cmd_tail = sed_cmd_tail->next; + /* skip whitespace before the command */ + while (isspace(*cmdstr)) cmdstr++; + } - return (cmdstr); -} + /* last part (mandatory) will be a command */ + if (!*cmdstr) bb_error_msg_and_die("missing command"); + sed_cmd->cmd = *(cmdstr++); + cmdstr = parse_cmd_args(sed_cmd, cmdstr); -static void add_cmd_str(const char *cmdstr) -{ - char *cmdstr_expanded = strdup(cmdstr); - char *cmdstr_ptr; - -#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE - cmdstr_ptr = cmdstr_expanded; - /* HACK: convert "\n" to match tranlated '\n' string */ - while ((cmdstr_ptr = strstr(cmdstr_ptr, "\\n")) != NULL) { - int length = strlen(cmdstr) + 2; - cmdstr_expanded = realloc(cmdstr_expanded, length); - cmdstr_ptr = strstr(cmdstr_expanded, "\\n"); - memmove(cmdstr_ptr + 1, cmdstr_ptr, strlen(cmdstr_ptr) + 1); - cmdstr_ptr[0] = '\\'; - cmdstr_ptr += 3; + /* Add the command to the command array */ + sed_cmd_tail->next = sed_cmd; + sed_cmd_tail = sed_cmd_tail->next; } -#endif - cmdstr_ptr = cmdstr_expanded; - do { - cmdstr_ptr = add_cmd(cmdstr_ptr); - } while (cmdstr_ptr && strlen(cmdstr_ptr)); - - free(cmdstr_expanded); -} - -static void load_cmd_file(const char *filename) -{ - FILE *cmdfile; - char *line; - char *nextline; - char *e; - - cmdfile = bb_xfopen(filename, "r"); - - while ((line = bb_get_line_from_file(cmdfile)) != NULL) { - /* if a line ends with '\' it needs the next line appended to it */ - while (((e = last_char_is(line, '\n')) != NULL) - && (e > line) && (e[-1] == '\\') - && ((nextline = bb_get_line_from_file(cmdfile)) != NULL)) { - line = xrealloc(line, (e - line) + 1 + strlen(nextline) + 1); - strcat(line, nextline); - free(nextline); - } - /* eat trailing newline (if any) --if I don't do this, edit commands - * (aic) will print an extra newline */ - chomp(line); - add_cmd_str(line); - free(line); + /* If we glued multiple lines together, free the memory. */ + if(add_cmd_line) { + free(add_cmd_line); + add_cmd_line=NULL; } } struct pipeline { - char *buf; - int idx; - int len; -}; + char *buf; /* Space to hold string */ + int idx; /* Space used */ + int len; /* Space allocated */ +} pipeline; -#define PIPE_MAGIC 0x7f #define PIPE_GROW 64 -void pipe_putc(struct pipeline *const pipeline, char c) +void pipe_putc(char c) { - if (pipeline->buf[pipeline->idx] == PIPE_MAGIC) { - pipeline->buf = xrealloc(pipeline->buf, pipeline->len + PIPE_GROW); - memset(pipeline->buf + pipeline->len, 0, PIPE_GROW); - pipeline->len += PIPE_GROW; - pipeline->buf[pipeline->len - 1] = PIPE_MAGIC; + if(pipeline.idx==pipeline.len) { + pipeline.buf = xrealloc(pipeline.buf, pipeline.len + PIPE_GROW); + pipeline.len+=PIPE_GROW; } - pipeline->buf[pipeline->idx++] = (c); + pipeline.buf[pipeline.idx++] = (c); } -#define pipeputc(c) pipe_putc(pipeline, c) - -static void print_subst_w_backrefs(const char *line, const char *replace, - regmatch_t * regmatch, struct pipeline *const pipeline, int matches) +static void do_subst_w_backrefs(const char *line, const char *replace) { - int i; + int i,j; /* go through the replacement string */ for (i = 0; replace[i]; i++) { /* if we find a backreference (\1, \2, etc.) print the backref'ed * text */ - if (replace[i] == '\\' && isdigit(replace[i + 1])) { - int j; - char tmpstr[2]; - int backref; - - ++i; /* i now indexes the backref number, instead of the leading slash */ - tmpstr[0] = replace[i]; - tmpstr[1] = 0; - backref = atoi(tmpstr); + if (replace[i] == '\\' && replace[i+1]>0 && replace[i+1]<=9) { + int backref=replace[++i]-'0'; + /* print out the text held in regmatch[backref] */ - if (backref <= matches && regmatch[backref].rm_so != -1) - for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; - j++) - pipeputc(line[j]); + if(regmatch[backref].rm_so != -1) + for (j = regmatch[backref].rm_so; j < regmatch[backref].rm_eo; j++) + pipe_putc(line[j]); } /* if we find a backslash escaped character, print the character */ - else if (replace[i] == '\\') { - ++i; - pipeputc(replace[i]); - } - - /* if we find an unescaped '&' print out the whole matched text. - * fortunately, regmatch[0] contains the indicies to the whole matched - * expression (kinda seems like it was designed for just such a - * purpose...) */ - else if (replace[i] == '&') { - int j; + else if (replace[i] == '\\') pipe_putc(replace[++i]); + /* if we find an unescaped '&' print out the whole matched text. */ + else if (replace[i] == '&') for (j = regmatch[0].rm_so; j < regmatch[0].rm_eo; j++) - pipeputc(line[j]); - } - /* nothing special, just print this char of the replacement string to stdout */ - else - pipeputc(replace[i]); + pipe_putc(line[j]); + /* Otherwise just output the character. */ + else pipe_putc(replace[i]); } } static int do_subst_command(sed_cmd_t * sed_cmd, char **line) { - char *hackline = *line; - struct pipeline thepipe = { NULL, 0, 0 }; - struct pipeline *const pipeline = &thepipe; + char *oldline = *line; int altered = 0; - int result; - regmatch_t *regmatch = NULL; + int match_count=0; regex_t *current_regex; + /* Handle empty regex. */ if (sed_cmd->sub_match == NULL) { current_regex = previous_regex_ptr; - } else { - previous_regex_ptr = current_regex = sed_cmd->sub_match; - } - result = regexec(current_regex, hackline, 0, NULL, 0); + if(!current_regex) + bb_error_msg_and_die("No previous regexp."); + } else previous_regex_ptr = current_regex = sed_cmd->sub_match; - /* we only proceed if the substitution 'search' expression matches */ - if (result == REG_NOMATCH) { + /* Find the first match */ + if(REG_NOMATCH==regexec(current_regex, oldline, 10, regmatch, 0)) return 0; - } - /* whaddaya know, it matched. get the number of back references */ - regmatch = xmalloc(sizeof(regmatch_t) * (sed_cmd->num_backrefs + 1)); - - /* allocate more PIPE_GROW bytes - if replaced string is larger than original */ - thepipe.len = strlen(hackline) + PIPE_GROW; - thepipe.buf = xcalloc(1, thepipe.len); - /* buffer magic */ - thepipe.buf[thepipe.len - 1] = PIPE_MAGIC; - - /* and now, as long as we've got a line to try matching and if we can match - * the search string, we make substitutions */ - while ((*hackline || !altered) - && (regexec(current_regex, hackline, sed_cmd->num_backrefs + 1, - regmatch, 0) != REG_NOMATCH)) { + /* Initialize temporary output buffer. */ + pipeline.buf=xmalloc(PIPE_GROW); + pipeline.len=PIPE_GROW; + pipeline.idx=0; + + /* Now loop through, substituting for matches */ + do { int i; + match_count++; + + /* If we aren't interested in this match, output old line to + end of match and continue */ + if(sed_cmd->which_match && sed_cmd->which_match!=match_count) { + for(i=0;i<regmatch[0].rm_eo;i++) + pipe_putc(oldline[i]); + continue; + } + /* print everything before the match */ - for (i = 0; i < regmatch[0].rm_so; i++) - pipeputc(hackline[i]); + for (i = 0; i < regmatch[0].rm_so; i++) pipe_putc(oldline[i]); /* then print the substitution string */ - print_subst_w_backrefs(hackline, sed_cmd->replace, regmatch, pipeline, - sed_cmd->num_backrefs); + do_subst_w_backrefs(oldline, sed_cmd->string); /* advance past the match */ - hackline += regmatch[0].rm_eo; + oldline += regmatch[0].rm_eo; /* flag that something has changed */ altered++; /* if we're not doing this globally, get out now */ - if (!sed_cmd->sub_g) { - break; - } - } - for (; *hackline; hackline++) - pipeputc(*hackline); - if (thepipe.buf[thepipe.idx] == PIPE_MAGIC) - thepipe.buf[thepipe.idx] = 0; + if (sed_cmd->which_match) break; + } while (*oldline && (regexec(current_regex, oldline, 10, regmatch, 0) != REG_NOMATCH)); - /* cleanup */ - free(regmatch); + /* Copy rest of string into output pipeline */ + + while(*oldline) pipe_putc(*(oldline++)); + pipe_putc(0); free(*line); - *line = thepipe.buf; + *line = pipeline.buf; return altered; } +/* Set command pointer to point to this label. (Does not handle null label.) */ static sed_cmd_t *branch_to(const char *label) { sed_cmd_t *sed_cmd; for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { - if ((sed_cmd->cmd == ':') && (sed_cmd->label) && (strcmp(sed_cmd->label, label) == 0)) { + if ((sed_cmd->cmd == ':') && (sed_cmd->string) && (strcmp(sed_cmd->string, label) == 0)) { return (sed_cmd); } } bb_error_msg_and_die("Can't find label for jump to `%s'", label); } -static void process_file(FILE * file) +/* Append copy of string to append buffer */ +static void append(char *s) { - char *pattern_space; /* Posix requires it be able to hold at least 8192 bytes */ - char *hold_space = NULL; /* Posix requires it be able to hold at least 8192 bytes */ - static int linenum = 0; /* GNU sed does not restart counting lines at EOF */ - int altered; - int force_print; - - pattern_space = bb_get_chomped_line_from_file(file); - if (pattern_space == NULL) { - return; + struct append_list *temp=calloc(1,sizeof(struct append_list)); + + if(append_head) + append_tail=(append_tail->next=temp); + else append_head=append_tail=temp; + temp->string=strdup(s); +} + +static void flush_append(void) +{ + /* Output appended lines. */ + while(append_head) { + puts(append_head->string); + append_tail=append_head->next; + free(append_head->string); + free(append_head); + append_head=append_tail; + } + append_head=append_tail=NULL; +} + +/* Get next line of input, flushing append buffer and noting if we hit EOF + * without a newline on the last line. + */ +static char *get_next_line(FILE * file, int *no_newline) +{ + char *temp; + int len; + + flush_append(); + temp=bb_get_line_from_file(file); + if(temp) { + len=strlen(temp); + if(len && temp[len-1]=='\n') temp[len-1]=0; + else *no_newline=1; } - + + return temp; +} + +/* Output line of text. missing_newline means the last line output did not + end with a newline. no_newline means this line does not end with a + newline. */ + +static int puts_maybe_newline(char *s, FILE *file, int missing_newline, int no_newline) +{ + if(missing_newline) fputc('\n',file); + fputs(s,file); + if(!no_newline) fputc('\n',file); + + return no_newline; +} + +#define sed_puts(s,n) missing_newline=puts_maybe_newline(s,stdout,missing_newline,n) + +static void process_file(FILE * file) +{ + char *pattern_space, *next_line, *hold_space=NULL; + static int linenum = 0, missing_newline=0; + int no_newline,next_no_newline=0; + + next_line = get_next_line(file,&next_no_newline); + /* go through every line in the file */ - do { - char *next_line; + for(;;) { sed_cmd_t *sed_cmd; - int substituted = 0; - /* This enables whole blocks of commands to be mask'ed out if the lead address doesnt match */ - int block_mask = 1; + int substituted=0; + + /* Advance to next line. Stop if out of lines. */ + if(!(pattern_space=next_line)) break; + no_newline=next_no_newline; /* Read one line in advance so we can act on the last line, the '$' address */ - next_line = bb_get_chomped_line_from_file(file); + next_line = get_next_line(file,&next_no_newline); linenum++; - altered = 0; - force_print = 0; - +restart: /* for every line, go through all the commands */ for (sed_cmd = sed_cmd_head.next; sed_cmd; sed_cmd = sed_cmd->next) { - int deleted = 0; - - /* - * entry point into sedding... - */ - int matched = ( - /* no range necessary */ - (sed_cmd->beg_line == 0 && sed_cmd->end_line == 0 - && sed_cmd->beg_match == NULL - && sed_cmd->end_match == NULL) || - /* this line number is the first address we're looking for */ - (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) || - /* this line matches our first address regex */ - (sed_cmd->beg_match - && (regexec(sed_cmd->beg_match, pattern_space, 0, NULL, - 0) == 0)) || - /* we are currently within the beginning & ending address range */ - sed_cmd->still_in_range || ((sed_cmd->beg_line == -1) - && (next_line == NULL)) + int matched; + + /* Determine if this command matches this line: */ + + /* Are we continuing a previous multi-line match? */ + + sed_cmd->in_match = sed_cmd->in_match + + /* Or is no range necessary? */ + || (!sed_cmd->beg_line && !sed_cmd->end_line + && !sed_cmd->beg_match && !sed_cmd->end_match) + + /* Or did we match the start of a numerical range? */ + || (sed_cmd->beg_line > 0 && (sed_cmd->beg_line == linenum)) + + /* Or does this line match our begin address regex? */ + || (sed_cmd->beg_match && + !regexec(sed_cmd->beg_match, pattern_space, 0, NULL, 0)) + + /* Or did we match last line of input? */ + || (sed_cmd->beg_line == -1 && next_line == NULL); + + /* Snapshot the value */ + + matched = sed_cmd->in_match; + + /* Is this line the end of the current match? */ + + if(matched) { + sed_cmd->in_match = !( + /* has the ending line come, or is this a single address command? */ + (sed_cmd->end_line ? + sed_cmd->end_line==-1 ? + !next_line + : sed_cmd->end_line<=linenum + : !sed_cmd->end_match) + /* or does this line matches our last address regex */ + || (sed_cmd->end_match && (regexec(sed_cmd->end_match, pattern_space, 0, NULL, 0) == 0)) ); + } + + /* Skip blocks of commands we didn't match. */ if (sed_cmd->cmd == '{') { - block_mask = block_mask & matched; + if(sed_cmd->invert ? matched : !matched) + while(sed_cmd && sed_cmd->cmd!='}') sed_cmd=sed_cmd->next; + if(!sed_cmd) bb_error_msg_and_die("Unterminated {"); + continue; } -// matched &= block_mask; - if (sed_cmd->invert ^ (matched & block_mask)) { - /* Update last used regex incase a blank substitute BRE is found */ + /* Okay, so did this line match? */ + if (sed_cmd->invert ? !matched : matched) { + /* Update last used regex in case a blank substitute BRE is found */ if (sed_cmd->beg_match) { previous_regex_ptr = sed_cmd->beg_match; } - /* - * actual sedding - */ + /* actual sedding */ switch (sed_cmd->cmd) { - case '=': - printf("%d\n", linenum); - break; - case 'P':{ - /* Write the current pattern space upto the first newline */ - char *tmp = strchr(pattern_space, '\n'); - - if (tmp) { - *tmp = '\0'; - puts(pattern_space); - *tmp = '\n'; + + /* Print line number */ + case '=': + printf("%d\n", linenum); break; - } - /* Fall Through */ - } - case 'p': /* Write the current pattern space to output */ - puts(pattern_space); - break; - case 'd': - altered++; - deleted = 1; - force_print = 0; - break; - - case 's': - - /* - * Some special cases for 's' printing to make it compliant with - * GNU sed printing behavior (aka "The -n | s///p Matrix"): - * - * -n ONLY = never print anything regardless of any successful - * substitution - * - * s///p ONLY = always print successful substitutions, even if - * the pattern_space is going to be printed anyway (pattern_space - * will be printed twice). - * - * -n AND s///p = print ONLY a successful substitution ONE TIME; - * no other lines are printed - this is the reason why the 'p' - * flag exists in the first place. - */ - -#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE - /* HACK: escape newlines twice so regex can match them */ - { - int offset = 0; - char *tmp = strchr(pattern_space + offset, '\n'); - while ((tmp = strchr(pattern_space + offset, '\n')) != NULL) { - offset = tmp - pattern_space; - pattern_space = xrealloc(pattern_space, strlen(pattern_space) + 2); - tmp = pattern_space + offset; - memmove(tmp + 1, tmp, strlen(tmp) + 1); - tmp[0] = '\\'; - tmp[1] = 'n'; - offset += 2; - } - } -#endif - /* we print the pattern_space once, unless we were told to be quiet */ - substituted |= do_subst_command(sed_cmd, &pattern_space); -#ifdef CONFIG_FEATURE_SED_EMBEDED_NEWLINE - /* undo HACK: escape newlines twice so regex can match them */ + + /* Write the current pattern space up to the first newline */ + case 'P': { - char *tmp = pattern_space; + char *tmp = strchr(pattern_space, '\n'); - while ((tmp = strstr(tmp, "\\n")) != NULL) { - memmove(tmp, tmp + 1, strlen(tmp + 1) + 1); - tmp[0] = '\n'; + if (tmp) { + *tmp = '\0'; + sed_puts(pattern_space,1); + *tmp = '\n'; + break; } + /* Fall Through */ } -#endif - if (!be_quiet && substituted && ((sed_cmd->next == NULL) - || (sed_cmd->next->cmd != 's'))) { - force_print = 1; - } - /* we also print the line if we were given the 'p' flag - * (this is quite possibly the second printing) */ - if ((sed_cmd->sub_p) && (altered || substituted)) { - puts(pattern_space); + + /* Write the current pattern space to output */ + case 'p': + sed_puts(pattern_space,no_newline); + break; + /* Delete up through first newline */ + case 'D': + { + char *tmp = strchr(pattern_space,'\n'); + + if(tmp) { + tmp=bb_xstrdup(tmp+1); + free(pattern_space); + pattern_space=tmp; + goto restart; + } } - break; - case 'a': - puts(pattern_space); - fputs(sed_cmd->editline, stdout); - altered++; - break; - - case 'i': - fputs(sed_cmd->editline, stdout); - break; - - case 'c': - /* single-address case */ - if ((sed_cmd->end_match == NULL && sed_cmd->end_line == 0) - /* multi-address case */ - /* - matching text */ - || (sed_cmd->end_match - && (regexec(sed_cmd->end_match, pattern_space, 0, - NULL, 0) == 0)) - /* - matching line numbers */ - || (sed_cmd->end_line > 0 - && sed_cmd->end_line == linenum)) { - fputs(sed_cmd->editline, stdout); + /* discard this line. */ + case 'd': + goto discard_line; + + /* Substitute with regex */ + case 's': + if(do_subst_command(sed_cmd, &pattern_space)) { + substituted|=1; + + /* handle p option */ + if(sed_cmd->sub_p) + sed_puts(pattern_space,no_newline); + /* handle w option */ + if(sed_cmd->file) + sed_cmd->no_newline=puts_maybe_newline(pattern_space, sed_cmd->file, sed_cmd->no_newline, no_newline); + + } + break; + + /* Append line to linked list to be printed later */ + case 'a': + { + append(sed_cmd->string); + break; } - altered++; - break; + /* Insert text before this line */ + case 'i': + sed_puts(sed_cmd->string,1); + break; + + /* Cut and paste text (replace) */ + case 'c': + /* Only triggers on last line of a matching range. */ + if (!sed_cmd->in_match) sed_puts(sed_cmd->string,1); + goto discard_line; - case 'r':{ - FILE *outfile; + /* Read file, append contents to output */ + case 'r': + { + FILE *outfile; - outfile = fopen(sed_cmd->filename, "r"); - if (outfile) { - char *line; + outfile = fopen(sed_cmd->string, "r"); + if (outfile) { + char *line; - while ((line = - bb_get_chomped_line_from_file(outfile)) != - NULL) { - pattern_space = - xrealloc(pattern_space, - strlen(line) + strlen(pattern_space) + 2); - strcat(pattern_space, "\n"); - strcat(pattern_space, line); + while ((line = bb_get_chomped_line_from_file(outfile)) + != NULL) + append(line); + bb_xprint_and_close_file(outfile); } - bb_xprint_and_close_file(outfile); - } - } - break; - case 'q': /* Branch to end of script and quit */ - deleted = 1; - /* Exit the outer while loop */ - free(next_line); - next_line = NULL; - break; - case 'n': /* Read next line from input */ - if (!be_quiet) { - puts(pattern_space); - } - if (next_line) { - free(pattern_space); - pattern_space = next_line; - next_line = bb_get_chomped_line_from_file(file); - linenum++; - } else { - /* Jump to end of script and exit */ - deleted = 1; - next_line = NULL; - } - break; - case 'N': /* Append the next line to the current line */ - if (next_line == NULL) { - /* Jump to end of script and exit */ - deleted = 1; -#ifdef CONFIG_FEATURE_SED_GNU_COMPATABILITY - /* GNU sed will add the newline character - * The GNU sed info page labels this as a bug that wont be fixed - */ - next_line = calloc(1,1); -#else - next_line = NULL; break; -#endif } - pattern_space = realloc(pattern_space, strlen(pattern_space) + strlen(next_line) + 2); - strcat(pattern_space, "\n"); - strcat(pattern_space, next_line); - next_line = bb_get_chomped_line_from_file(file); - linenum++; - break; - case 't': - if (substituted) - /* Fall through */ - case 'b': + + /* Write pattern space to file. */ + case 'w': + sed_cmd->no_newline=puts_maybe_newline(pattern_space,sed_cmd->file, sed_cmd->no_newline,no_newline); + break; + + /* Read next line from input */ + case 'n': + if (!be_quiet) + sed_puts(pattern_space,no_newline); + if (next_line) { + free(pattern_space); + pattern_space = next_line; + no_newline=next_no_newline; + next_line = get_next_line(file,&next_no_newline); + linenum++; + break; + } + /* fall through */ + + /* Quit. End of script, end of input. */ + case 'q': + /* Exit the outer while loop */ + free(next_line); + next_line = NULL; + goto discard_commands; + + /* Append the next line to the current line */ + case 'N': { - if (sed_cmd->label == NULL) { - /* Jump to end of script */ - deleted = 1; + /* If no next line, jump to end of script and exit. */ + if (next_line == NULL) { + /* Jump to end of script and exit */ + free(next_line); + next_line = NULL; + goto discard_line; + /* append next_line, read new next_line. */ } else { - sed_cmd = branch_to(sed_cmd->label); + int len=strlen(pattern_space); + + pattern_space = realloc(pattern_space, len + strlen(next_line) + 2); + pattern_space[len]='\n'; + strcpy(pattern_space+len+1, next_line); + no_newline=next_no_newline; + next_line = get_next_line(file,&next_no_newline); + linenum++; } - /* Reset the substitution flag */ - substituted = 0; + break; } - break; - case 'y':{ - int i; - for (i = 0; pattern_space[i] != 0; i++) { - int j; + /* Test if substition worked, branch if so. */ + case 't': + if (!substituted) break; + substituted=0; + /* Fall through */ + /* Branch to label */ + case 'b': + if (!sed_cmd->string) goto discard_commands; + else sed_cmd = branch_to(sed_cmd->string); + break; + /* Transliterate characters */ + case 'y': + { + int i; + + for (i = 0; pattern_space[i]; i++) { + int j; - for (j = 0; sed_cmd->translate[j]; j += 2) { - if (pattern_space[i] == sed_cmd->translate[j]) { - pattern_space[i] = sed_cmd->translate[j + 1]; + for (j = 0; sed_cmd->string[j]; j += 2) { + if (pattern_space[i] == sed_cmd->string[j]) { + pattern_space[i] = sed_cmd->string[j + 1]; + } } } - } - } - break; - case 'g': /* Replace pattern space with hold space */ - free(pattern_space); - if (hold_space) { - pattern_space = strdup(hold_space); - } - break; - case 'G': { /* Append newline and hold space to pattern space */ - int pattern_space_size = 2; - int hold_space_size = 0; - if (pattern_space) { - pattern_space_size += strlen(pattern_space); - } - if (hold_space) { - hold_space_size = strlen(hold_space); + break; } - pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); - if (pattern_space_size == 2) { - strcpy(pattern_space, "\n"); - } else { + case 'g': /* Replace pattern space with hold space */ + free(pattern_space); + if (hold_space) { + pattern_space = strdup(hold_space); + no_newline=0; + } + break; + case 'G': /* Append newline and hold space to pattern space */ + { + int pattern_space_size = 2; + int hold_space_size = 0; + + if (pattern_space) + pattern_space_size += strlen(pattern_space); + if (hold_space) hold_space_size = strlen(hold_space); + pattern_space = xrealloc(pattern_space, pattern_space_size + hold_space_size); + if (pattern_space_size == 2) pattern_space[0]=0; strcat(pattern_space, "\n"); + if (hold_space) strcat(pattern_space, hold_space); + no_newline=0; + + break; } - if (hold_space) { - strcat(pattern_space, hold_space); - } - break; - } - case 'h': /* Replace hold space with pattern space */ - free(hold_space); - hold_space = strdup(pattern_space); - break; - case 'H': { /* Append newline and pattern space to hold space */ - int hold_space_size = 2; - int pattern_space_size = 0; - - if (hold_space) { - hold_space_size += strlen(hold_space); - } - if (pattern_space) { - pattern_space_size = strlen(pattern_space); - } - hold_space = xrealloc(hold_space, hold_space_size + pattern_space_size); + case 'h': /* Replace hold space with pattern space */ + free(hold_space); + hold_space = strdup(pattern_space); + break; + case 'H': /* Append newline and pattern space to hold space */ + { + int hold_space_size = 2; + int pattern_space_size = 0; - if (hold_space_size == 2) { - strcpy(hold_space, "\n"); - } else { + if (hold_space) hold_space_size += strlen(hold_space); + if (pattern_space) + pattern_space_size = strlen(pattern_space); + hold_space = xrealloc(hold_space, + hold_space_size + pattern_space_size); + + if (hold_space_size == 2) hold_space[0]=0; strcat(hold_space, "\n"); + if (pattern_space) strcat(hold_space, pattern_space); + + break; } - if (pattern_space) { - strcat(hold_space, pattern_space); + case 'x': /* Exchange hold and pattern space */ + { + char *tmp = pattern_space; + pattern_space = hold_space; + no_newline=0; + hold_space = tmp; + break; } - break; - } - case 'x':{ - /* Swap hold and pattern space */ - char *tmp = pattern_space; - pattern_space = hold_space; - hold_space = tmp; - break; - } } } - - /* - * exit point from sedding... - */ - if (matched) { - if ( - /* this is a single-address command or... */ - (sed_cmd->end_line == 0 && sed_cmd->end_match == NULL) - /* If only one address */ - /* we were in the middle of our address range (this - * isn't the first time through) and.. */ - || ((sed_cmd->still_in_range == 1) - /* this line number is the last address we're looking for or... */ - && ((sed_cmd->end_line > 0 - && (sed_cmd->end_line == linenum)) - /* this line matches our last address regex */ - || (sed_cmd->end_match - && (regexec(sed_cmd->end_match, pattern_space, - 0, NULL, 0) == 0))))) { - /* we're out of our address range */ - sed_cmd->still_in_range = 0; - } else { - /* didn't hit the exit? then we're still in the middle of an address range */ - sed_cmd->still_in_range = 1; - } - } - - if (sed_cmd->cmd == '}') { - block_mask = 1; - } - - if (deleted) - break; - } - /* we will print the line unless we were told to be quiet or if the - * line was altered (via a 'd'elete or 's'ubstitution), in which case - * the altered line was already printed */ - if ((!be_quiet && !altered && !substituted) || force_print) { - puts(pattern_space); - } + /* + * exit point from sedding... + */ +discard_commands: + /* we will print the line unless we were told to be quiet ('-n') + or if the line was suppressed (ala 'd'elete) */ + if (!be_quiet) sed_puts(pattern_space,no_newline); + + /* Delete and such jump here. */ +discard_line: + flush_append(); free(pattern_space); - pattern_space = next_line; - } while (pattern_space); + } } extern int sed_main(int argc, char **argv) @@ -1179,7 +1048,7 @@ extern int sed_main(int argc, char **argv) #ifdef CONFIG_FEATURE_CLEAN_UP /* destroy command strings on exit */ - if (atexit(destroy_cmd_strs) == -1) + if (atexit(free_and_close_stuff) == -1) bb_perror_msg_and_die("atexit"); #endif @@ -1189,17 +1058,46 @@ extern int sed_main(int argc, char **argv) case 'n': be_quiet++; break; - case 'e':{ - add_cmd_str(optarg); + case 'e': + { + int go=1; + char *temp=bb_xstrdup(optarg),*temp2=temp; + + /* It is possible to have a command line argument with embedded + newlines. This counts as a multi-line argument. */ + + while(go) { + int len=strcspn(temp2,"\n"); + if(!temp2[len]) go=0; + else temp2[len]=0; + add_cmd(temp2); + temp2+=len+1; + } + free(temp); break; } case 'f': - load_cmd_file(optarg); + { + FILE *cmdfile; + char *line; + + cmdfile = bb_xfopen(optarg, "r"); + + while ((line = bb_get_chomped_line_from_file(cmdfile)) + != NULL) { + add_cmd(line); + free(line); + } + bb_xprint_and_close_file(cmdfile); + break; + } default: bb_show_usage(); } } + /* Flush any unfinished commands. */ + add_cmd(""); /* if we didn't get a pattern from a -e and no command file was specified, * argv[optind] should be the pattern. no pattern, no worky */ @@ -1207,7 +1105,7 @@ extern int sed_main(int argc, char **argv) if (argv[optind] == NULL) bb_show_usage(); else - add_cmd_str(argv[optind++]); + add_cmd(argv[optind++]); } /* argv[(optind)..(argc-1)] should be names of file to process. If no diff --git a/include/libbb.h b/include/libbb.h index 2bb5ce02d..eb6841d33 100644 --- a/include/libbb.h +++ b/include/libbb.h @@ -28,6 +28,7 @@ #include <stdarg.h> #include <sys/stat.h> #include <sys/types.h> +#include <regex.h> #include <termios.h> #include <netdb.h> @@ -468,5 +469,5 @@ extern void print_login_prompt(void); extern void vfork_daemon_rexec(int argc, char **argv, char *foreground_opt); extern void get_terminal_width_height(int fd, int *width, int *height); extern unsigned long get_ug_id(const char *s, long (*my_getxxnam)(const char *)); - +extern void xregcomp(regex_t *preg, const char *regex, int cflags); #endif /* __LIBCONFIG_H__ */ |