diff options
author | Rob Landley <rob@landley.net> | 2014-12-21 01:54:54 -0600 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2014-12-21 01:54:54 -0600 |
commit | 1a1e0a9d325dd1ca7461a8e8203dd47bab6a8bc1 (patch) | |
tree | 6fc7da5f3bbd3ad9ead0fd52c2dc4d67d5a825cc /toys/pending | |
parent | 32cd2b770fe3ac8c419a29156162f3a037cf47a3 (diff) | |
download | toybox-1a1e0a9d325dd1ca7461a8e8203dd47bab6a8bc1.tar.gz |
Promote sed to posix.
Diffstat (limited to 'toys/pending')
-rw-r--r-- | toys/pending/sed.c | 1002 |
1 files changed, 0 insertions, 1002 deletions
diff --git a/toys/pending/sed.c b/toys/pending/sed.c deleted file mode 100644 index a5c24454..00000000 --- a/toys/pending/sed.c +++ /dev/null @@ -1,1002 +0,0 @@ -/* sed.c - stream editor. Thing that does s/// and other stuff. - * - * Copyright 2014 Rob Landley <rob@landley.net> - * - * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html - * - * TODO: lines > 2G could signed int wrap length counters. Not just getline() - * but N and s/// - -USE_SED(NEWTOY(sed, "(version)e*f*inr", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE)) - -config SED - bool "sed" - default n - help - usage: sed [-inr] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...] - - Stream editor. Apply one or more editing SCRIPTs to each line of input - (from FILE or stdin) producing output (by default to stdout). - - -e add SCRIPT to list - -f add contents of SCRIPT_FILE to list - -i Edit each file in place. - -n No default output. (Use the p command to output matched lines.) - -r Use extended regular expression syntax. - -s Treat input files separately (implied by -i) - - A SCRIPT is a series of one or more COMMANDs separated by newlines or - semicolons. All -e SCRIPTs are concatenated together as if separated - by newlines, followed by all lines from -f SCRIPT_FILEs, in order. - If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT. - - Each COMMAND may be preceded by an address which limits the command to - apply only to the specified line(s). Commands without an address apply to - every line. Addresses are of the form: - - [ADDRESS[,ADDRESS]]COMMAND - - The ADDRESS may be a decimal line number (starting at 1), a /regular - expression/ within a pair of forward slashes, or the character "$" which - matches the last line of input. (In -s or -i mode this matches the last - line of each file, otherwise just the last line of the last file.) A single - address matches one line, a pair of comma separated addresses match - everything from the first address to the second address (inclusive). If - both addresses are regular expressions, more than one range of lines in - each file can match. - - REGULAR EXPRESSIONS in sed are started and ended by the same character - (traditionally / but anything except a backslash or a newline works). - Backslashes may be used to escape the delimiter if it occurs in the - regex, and for the usual printf escapes (\abcefnrtv and octal, hex, - and unicode). An empty regex repeats the previous one. ADDRESS regexes - (above) require the first delimeter to be escaped with a backslash when - it isn't a forward slash (to distinguish it from the COMMANDs below). - - Sed mostly operates on individual lines one at a time. It reads each line, - processes it, and either writes it to the output or discards it before - reading the next line. Sed can remember one additional line in a separate - buffer (using the h, H, g, G, and x commands), and can read the next line - of input early (using the n and N command), but other than that command - scripts operate on individual lines of text. - - Each COMMAND starts with a single character. The following commands take - no arguments: - - { Start a new command block, continuing until a corresponding "}". - Command blocks may nest. If the block has an address, commands within - the block are only run for lines within the block's address range. - - } End command block (this command cannot have an address) - - d Delete this line and move on to the next one - (ignores remaining COMMANDs) - - D Delete one line of input and restart command SCRIPT (same as "d" - unless you've glued lines together with "N" or similar) - - g Get remembered line (overwriting current line) - - G Get remembered line (appending to current line) - - h Remember this line (overwriting remembered line) - - H Remember this line (appending to remembered line, if any) - - l Print line, escaping \abfrtv (but not newline), octal escaping other - nonprintable characters, wrapping lines to terminal width with a - backslash, and appending $ to actual end of line. - - n Print default output and read next line, replacing current line - (If no next line available, quit processing script) - - N Append next line of input to this line, separated by a newline - (This advances the line counter for address matching and "=", if no - next line available quit processing script without default output) - - p Print this line - - P Print this line up to first newline (from "N") - - q Quit (print default output, no more commands processed or lines read) - - x Exchange this line with remembered line (overwrite in both directions) - - = Print the current line number (followed by a newline) - - The following commands (may) take an argument. The "text" arguments (to - the "a", "b", and "c" commands) may end with an unescaped "\" to append - the next line (for which leading whitespace is not skipped), and also - treat ";" as a literal character (use "\;" instead). - - a [text] Append text to output before attempting to read next line - - b [label] Branch, jumps to :label (or with no label, to end of SCRIPT) - - c [text] Delete line, output text at end of matching address range - (ignores remaining COMMANDs) - - i [text] Print text - - r [file] Append contents of file to output before attempting to read - next line. - - s/S/R/F Search for regex S, replace matched text with R using flags F. - The first character after the "s" (anything but newline or - backslash) is the delimiter, escape with \ to use normally. - - The replacement text may contain "&" to substitute the matched - text (escape it with backslash for a literal &), or \1 through - \9 to substitute a parenthetical subexpression in the regex. - You can also use the normal backslash escapes such as \n and - a backslash at the end of the line appends the next line. - - The flags are: - - [0-9] A number, substitute only that occurrence of pattern - g Global, substitute all occurrences of pattern - i Ignore case when matching - p Print the line if match was found and replaced - w [file] Write (append) line to file if match replaced - - t [label] Test, jump to :label only if an "s" command found a match in - this line since last test (replacing with same text counts) - - T [label] Test false, jump only if "s" hasn't found a match. - - w [file] Write (append) line to file - - y/old/new/ Change each character in 'old' to corresponding character - in 'new' (with standard backslash escapes, delimiter can be - any repeated character except \ or \n) - - : [label] Labeled target for jump commands - - # Comment, ignore rest of this line of SCRIPT - - Deviations from posix: allow extended regular expressions with -r, - editing in place with -i, separate with -s, printf escapes in text, line - continuations, semicolons after all commands, 2-address anywhere an - address is allowed, "T" command, multiline continuations for [abc], - \; to end [abc] argument before end of line. -*/ - -#define FOR_sed -#include "toys.h" - -GLOBALS( - struct arg_list *f; - struct arg_list *e; - - // processed pattern list - struct double_list *pattern; - - char *nextline, *remember; - void *restart, *lastregex; - long nextlen, rememberlen, count; - int fdout, noeol; - unsigned xx; -) - -struct step { - struct step *next, *prev; - - // Begin and end of each match - long lmatch[2]; - int rmatch[2], arg1, arg2, w; // offsets because remalloc() - unsigned not, hit, sflags; - char c; // action -}; - -// Write out line with potential embedded NUL, handling eol/noeol -static int emit(char *line, long len, int eol) -{ - int l, old = line[len]; - - if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1; - if (eol) line[len++] = '\n'; - if (!len) return 0; - TT.noeol = len && !eol; - l = writeall(TT.fdout, line, len); - if (eol) line[len-1] = old; - if (l != len) { - perror_msg("short write"); - - return 1; - } - - return 0; -} - -// Do regex matching handling embedded NUL bytes in string. Note that -// neither the pattern nor the match can currently include NUL bytes -// (even with wildcards) and string must be null terminated. -static int ghostwheel(regex_t *preg, char *string, long len, int nmatch, - regmatch_t pmatch[], int eflags) -{ - char *s = string; - - for (;;) { - long ll = 0; - int rc; - - while (len && !*s) { - s++; - len--; - } - while (s[ll] && ll<len) ll++; - - rc = regexec(preg, s, nmatch, pmatch, eflags); - if (!rc) { - for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) { - pmatch[rc].rm_so += s-string; - pmatch[rc].rm_eo += s-string; - } - - return 0; - } - if (ll==len) return rc; - - s += ll; - len -= ll; - } -} - -// Extend allocation to include new string, with newline between if newlen<0 - -static char *extend_string(char **old, char *new, int oldlen, int newlen) -{ - int newline = newlen < 0; - char *s; - - if (newline) newlen = -newlen; - s = *old = xrealloc(*old, oldlen+newlen+newline+1); - if (newline) s[oldlen++] = '\n'; - memcpy(s+oldlen, new, newlen); - s[oldlen+newlen] = 0; - - return s+oldlen+newlen+1; -} - -// An empty regex repeats the previous one -void *get_regex(void *trump, int offset) -{ - if (!offset) { - if (!TT.lastregex) error_exit("no previous regex"); - return TT.lastregex; - } - - return TT.lastregex = offset+(char *)trump; -} - -// Apply pattern to line from input file -static void walk_pattern(char **pline, long plen) -{ - struct append { - struct append *next, *prev; - int file; - char *str; - } *append = 0; - char *line = TT.nextline; - long len = TT.nextlen; - struct step *logrus; - int eol = 0, tea = 0; - - // Grab next line for deferred processing (EOF detection: we get a NULL - // pline at EOF to flush last line). Note that only end of _last_ input - // file matches $ (unless we're doing -i). - TT.nextline = 0; - TT.nextlen = 0; - if (pline) { - TT.nextline = *pline; - TT.nextlen = plen; - *pline = 0; - } - - if (!line || !len) return; - if (line[len-1] == '\n') line[--len] = eol++; - TT.count++; - - logrus = TT.restart ? TT.restart : (void *)TT.pattern; - TT.restart = 0; - - while (logrus) { - char *str, c = logrus->c; - - // Have we got a line or regex matching range for this rule? - if (*logrus->lmatch || *logrus->rmatch) { - int miss = 0; - long lm; - - // In a match that might end? - if (logrus->hit) { - if (!(lm = logrus->lmatch[1])) { - if (!logrus->rmatch[1]) logrus->hit = 0; - else { - void *rm = get_regex(logrus, logrus->rmatch[1]); - - // regex match end includes matching line, so defer deactivation - if (line && !ghostwheel(rm, line, len, 0, 0, 0)) miss = 1; - } - } else if (lm > 0 && lm < TT.count) logrus->hit = 0; - - // Start a new match? - } else { - if (!(lm = *logrus->lmatch)) { - void *rm = get_regex(logrus, *logrus->rmatch); - - if (line && !ghostwheel(rm, line, len, 0, 0, 0)) logrus->hit++; - } else if (lm == TT.count || (lm == -1 && !pline)) logrus->hit++; - - if (!logrus->lmatch[1] && !logrus->rmatch[1]) miss = 1; - } - - // Didn't match? - lm = !(logrus->hit ^ logrus->not); - - // Deferred disable from regex end match - if (miss || logrus->lmatch[1] == TT.count) logrus->hit = 0; - - if (lm) { - // Handle skipping curly bracket command group - if (c == '{') { - int curly = 1; - - while (curly) { - logrus = logrus->next; - if (logrus->c == '{') curly++; - if (logrus->c == '}') curly--; - } - } - logrus = logrus->next; - continue; - } - } - - // A deleted line can still update line match state for later commands - if (!line) { - logrus = logrus->next; - continue; - } - - // Process command - - if (c=='a' || c=='r') { - struct append *a = xzalloc(sizeof(struct append)); - a->str = logrus->arg1+(char *)logrus; - a->file = c== 'r'; - dlist_add_nomalloc((void *)&append, (void *)a); - } else if (c=='b' || c=='t' || c=='T') { - int t = tea; - - if (c != 'b') tea = 0; - if (c=='b' || t^(c=='T')) { - if (!logrus->arg1) break; - str = logrus->arg1+(char *)logrus; - for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next) - if (logrus->c == ':' && !strcmp(logrus->arg1+(char *)logrus, str)) - break; - if (!logrus) error_exit("no :%s", str); - } - } else if (c=='c') { - str = logrus->arg1+(char *)logrus; - if (!logrus->hit) emit(str, strlen(str), 1); - free(line); - line = 0; - continue; - } else if (c=='d') { - free(line); - line = 0; - continue; - } else if (c=='D') { - // Delete up to \n or end of buffer - str = line; - while ((str-line)<len) if (*(str++) == '\n') break; - len -= str - line; - memmove(line, str, len); - - // if "delete" blanks line, disable further processing - // otherwise trim and restart script - if (!len) { - free(line); - line = 0; - } else { - line[len] = 0; - logrus = (void *)TT.pattern; - } - continue; - } else if (c=='g') { - free(line); - line = xstrdup(TT.remember); - len = TT.rememberlen; - } else if (c=='G') { - line = xrealloc(line, len+TT.rememberlen+2); - line[len++] = '\n'; - memcpy(line+len, TT.remember, TT.rememberlen); - line[len += TT.rememberlen] = 0; - } else if (c=='h') { - free(TT.remember); - TT.remember = xstrdup(line); - TT.rememberlen = len; - } else if (c=='H') { - TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2); - TT.remember[TT.rememberlen++] = '\n'; - memcpy(TT.remember+TT.rememberlen, line, len); - TT.remember[TT.rememberlen += len] = 0; - } else if (c=='i') { - str = logrus->arg1+(char *)logrus; - emit(str, strlen(str), 1); - } else if (c=='l') { - int i, x, off; - - if (!TT.xx) { - terminal_size(&TT.xx, 0); - if (!TT.xx) TT.xx = 80; - if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10; - if (TT.xx > 4) TT.xx -= 4; - } - - for (i = off = 0; i<len; i++) { - if (off >= TT.xx) { - toybuf[off++] = '\\'; - emit(toybuf, off, 1); - off = 0; - } - x = stridx("\\\a\b\f\r\t\v", line[i]); - if (x != -1) { - toybuf[off++] = '\\'; - toybuf[off++] = "\\abfrtv"[x]; - } else if (line[i] >= ' ') toybuf[off++] = line[i]; - else off += sprintf(toybuf+off, "\\%03o", line[i]); - } - toybuf[off++] = '$'; - emit(toybuf, off, 1); - } else if (c=='n') { - TT.restart = logrus->next; - - break; - } else if (c=='N') { - // Can't just grab next line because we could have multiple N and - // we need to actually read ahead to get N;$p EOF detection right. - if (pline) { - TT.restart = logrus->next; - extend_string(&line, TT.nextline, len, -TT.nextlen); - free(TT.nextline); - TT.nextline = line; - TT.nextlen += len + 1; - line = 0; - } - - // Pending append goes out right after N - goto done; - } else if (c=='p' || c=='P') { - char *l = (c=='P') ? strchr(line, '\n') : 0; - - if (emit(line, l ? l-line : len, eol)) break; - } else if (c=='q') { - if (pline) *pline = (void *)1; - free(TT.nextline); - TT.nextline = 0; - TT.nextlen = 0; - - break; - } else if (c=='s') { - char *rline = line, *new = logrus->arg2 + (char *)logrus, *swap, *rswap; - regmatch_t *match = (void *)toybuf; - regex_t *reg = get_regex(logrus, logrus->arg1); - int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen; - - // Find match in remaining line (up to remaining len) - while (!ghostwheel(reg, rline, rlen, 10, match, mflags)) { - mflags = REG_NOTBOL; - - // Zero length matches don't count immediately after a previous match - mlen = match[0].rm_eo-match[0].rm_so; - if (!mlen && !zmatch) { - if (!rlen--) break; - rline++; - zmatch++; - continue; - } else zmatch = 0; - - // If we're replacing only a specific match, skip if this isn't it - off = logrus->sflags>>3; - if (off && off != ++count) { - rline += match[0].rm_eo; - rlen -= match[0].rm_eo; - - continue; - } - // The fact getline() can allocate unbounded amounts of memory is - // a bigger issue, but while we're here check for integer overflow - if (match[0].rm_eo > INT_MAX) perror_exit(0); - - // newlen = strlen(new) but with \1 and & and printf escapes - for (off = newlen = 0; new[off]; off++) { - int cc = -1; - - if (new[off] == '&') cc = 0; - else if (new[off] == '\\') cc = new[++off] - '0'; - if (cc < 0 || cc > 9) { - newlen++; - continue; - } - newlen += match[cc].rm_eo-match[cc].rm_so; - } - - // Allocate new size, copy start/end around match. (Can't extend in - // place because backrefs may refer to text after it's overwritten.) - len += newlen-mlen; - swap = xmalloc(len+1); - rswap = swap+(rline-line)+match[0].rm_so; - memcpy(swap, line, (rline-line)+match[0].rm_so); - memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1); - - // copy in new replacement text - for (off = mlen = 0; new[off]; off++) { - int cc = 0, ll; - - if (new[off] == '\\') { - cc = new[++off] - '0'; - if (cc<0 || cc>9) { - if (!(rswap[mlen++] = unescape(new[off]))) - rswap[mlen-1] = new[off]; - - continue; - } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc); - } else if (new[off] != '&') { - rswap[mlen++] = new[off]; - - continue; - } - - ll = match[cc].rm_eo-match[cc].rm_so; - memcpy(rswap+mlen, rline+match[cc].rm_so, ll); - mlen += ll; - } - - rline = rswap+newlen; - free(line); - line = swap; - - // Stop after first substitution unless we have flag g - if (!(logrus->sflags & 2)) break; - } - - if (mflags) { - // flag p - if (logrus->sflags & 4) emit(line, len, eol); - - tea = 1; - if (logrus->w) goto writenow; - } - } else if (c=='w') { - int fd, noeol; - char *name; - -writenow: - // Swap out emit() context - fd = TT.fdout; - noeol = TT.noeol; - - // We save filehandle and newline status before filename - name = logrus->w + (char *)logrus; - memcpy(&TT.fdout, name, 4); - name += 4; - TT.noeol = *(name++); - - // write, then save/restore context - if (emit(line, len, eol)) - perror_exit("w '%s'", logrus->arg1+(char *)logrus); - *(--name) = TT.noeol; - TT.noeol = noeol; - TT.fdout = fd; - } else if (c=='x') { - long swap = TT.rememberlen; - - str = TT.remember; - TT.remember = line; - line = str; - TT.rememberlen = len; - len = swap; - } else if (c=='y') { - char *from, *to = (char *)logrus; - int i, j; - - from = to+logrus->arg1; - to += logrus->arg2; - - for (i = 0; i < len; i++) { - j = stridx(from, line[i]); - if (j != -1) line[i] = to[j]; - } - } else if (c=='=') { - sprintf(toybuf, "%ld", TT.count); - emit(toybuf, strlen(toybuf), 1); - } - - logrus = logrus->next; - } - - if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol); - -done: - free(line); - - if (dlist_terminate(append)) while (append) { - struct append *a = append->next; - - if (append->file) { - int fd = xopen(append->str, O_RDONLY); - - // Force newline if noeol pending - emit(0, 0, 0); - xsendfile(fd, TT.fdout); - close(fd); - } else emit(append->str, strlen(append->str), 1); - free(append); - append = a; - } -} - -// Genericish function, can probably get moved to lib.c - -// Iterate over lines in file, calling function. Function can write 0 to -// the line pointer if they want to keep it, or 1 to terminate processing, -// otherwise line is freed. Passed file descriptor is closed at the end. -static void do_lines(int fd, char *name, void (*call)(char **pline, long len)) -{ - FILE *fp = fd ? xfdopen(fd, "r") : stdin; - - for (;;) { - char *line = 0; - ssize_t len; - - len = getline(&line, (void *)&len, fp); - if (len > 0) { - call(&line, len); - if (line == (void *)1) break; - free(line); - } else break; - } - - if (fd) fclose(fp); -} - -static void do_sed(int fd, char *name) -{ - int i = toys.optflags & FLAG_i; - char *tmp; - - if (i) { - struct step *primal; - - if (!fd && *name=='-') { - error_msg("no -i on stdin"); - return; - } - TT.fdout = copy_tempfile(fd, name, &tmp); - TT.count = 0; - for (primal = (void *)TT.pattern; primal; primal = primal->next) - primal->hit = 0; - } - do_lines(fd, name, walk_pattern); - if (i) { - walk_pattern(0, 0); - replace_tempfile(-1, TT.fdout, &tmp); - TT.fdout = 1; - TT.nextline = 0; - TT.nextlen = TT.noeol = 0; - } -} - -// Copy chunk of string between two delimiters, converting printf escapes. -// returns processed copy of string (0 if error), *pstr advances to next -// unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter -// if regxex, ignore delimiter in [ranges] -static char *unescape_delimited_string(char **pstr, char *delim, int regex) -{ - char *to, *from, d; - - to = from = *pstr; - if (!delim || !*delim) { - if (!(d = *(from++))) return 0; - if (d == '\\') d = *(from++); - if (!d || d == '\\') return 0; - if (delim) *delim = d; - } else d = *delim; - to = delim = xmalloc(strlen(*pstr)+1); - - while (*from != d) { - if (!*from) return 0; - - // delimiter in regex character range doesn't count - if (*from == '[') { - int len = 1; - - if (from[len] == ']') len++; - while (from[len] != ']') if (!from[len++]) return 0; - memmove(to, from, ++len); - to += len; - from += len; - continue; - } - if (*from == '\\') { - if (!from[1]) return 0; - - // Check escaped end delimiter before printf style escapes. - if (from[1] == d) from++; - else if (from[1]=='\\') *(to++) = *(from++); - else { - char c = unescape(from[1]); - - if (c) { - *(to++) = c; - from+=2; - continue; - } - } - } - *(to++) = *(from++); - } - *to = 0; - *pstr = from+1; - - return delim; -} - -// Translate primal pattern into walkable form. -static void jewel_of_judgement(char **pline, long len) -{ - struct step *corwin = (void *)TT.pattern; - char *line = *pline, *reg, c, *errstart = *pline; - int i; - - // Append additional line to pattern argument string? - if (corwin && corwin->prev->hit) { - // Remove half-finished entry from list so remalloc() doesn't confuse it - TT.pattern = TT.pattern->prev; - corwin = dlist_pop(&TT.pattern); - corwin->hit = 0; - c = corwin->c; - reg = (char *)corwin; - reg += corwin->arg1 + strlen(reg + corwin->arg1); - - // Resume parsing - goto append; - } - - // Loop through commands in line - - corwin = 0; - for (;;) { - if (corwin) dlist_add_nomalloc(&TT.pattern, (void *)corwin); - - while (isspace(*line) || *line == ';') line++; - if (!*line || *line == '#') return; - - errstart = line; - memset(toybuf, 0, sizeof(struct step)); - corwin = (void *)toybuf; - reg = toybuf + sizeof(struct step); - - // Parse address range (if any) - for (i = 0; i < 2; i++) { - if (*line == ',') line++; - else if (i) break; - - if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0); - else if (*line == '$') { - corwin->lmatch[i] = -1; - line++; - } else if (*line == '/' || *line == '\\') { - char *s = line; - - if (!(s = unescape_delimited_string(&line, 0, 1))) goto brand; - if (!*s) corwin->rmatch[i] = 0; - else { - xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED); - corwin->rmatch[i] = reg-toybuf; - reg += sizeof(regex_t); - } - free(s); - } else break; - } - - while (isspace(*line)) line++; - if (!*line) break; - - while (*line == '!') { - corwin->not = 1; - line++; - } - while (isspace(*line)) line++; - - c = corwin->c = *(line++); - if (strchr("}:", c) && i) break; - if (strchr("aiqr=", c) && i>1) break; - - // Add step to pattern - corwin = xmalloc(reg-toybuf); - memcpy(corwin, toybuf, reg-toybuf); - reg = (reg-toybuf) + (char *)corwin; - - // Parse arguments by command type - if (c == '{') TT.nextlen++; - else if (c == '}') { - if (!TT.nextlen--) break; - } else if (c == 's') { - char *merlin, *fiona, delim = 0; - - // s/pattern/replacement/flags - - // get pattern (just record, we parse it later) - corwin->arg1 = reg - (char *)corwin; - if (!(merlin = unescape_delimited_string(&line, &delim, 1))) goto brand; - - // get replacement - don't replace escapes because \1 and \& need - // processing later, after we replace \\ with \ we can't tell \\1 from \1 - fiona = line; - while (*line != delim) { - if (!*line) goto brand; - if (*line == '\\') { - if (!line[1]) goto brand; - line += 2; - } else line++; - } - - corwin->arg2 = corwin->arg1 + sizeof(regex_t); - reg = extend_string((void *)&corwin, fiona, corwin->arg2, line-fiona)+1; - - // get flags - for (line++; *line; line++) { - long l; - - if (isspace(*line) && *line != '\n') continue; - - if (0 <= (l = stridx("igp", *line))) corwin->sflags |= 1<<l; - else if (!(corwin->sflags>>3) && 0<(l = strtol(line, &line, 10))) { - corwin->sflags |= l << 3; - line--; - } else break; - } - - // We deferred actually parsing the regex until we had the s///i flag - // allocating the space was done by extend_string() above - if (!*merlin) corwin->arg1 = 0; - else xregcomp((void *)(corwin->arg1 + (char *)corwin), merlin, - ((toys.optflags & FLAG_r)*REG_EXTENDED)|((corwin->sflags&1)*REG_ICASE)); - free(merlin); - if (*line == 'w') { - line++; - goto writenow; - } - } else if (c == 'w') { - int fd, delim; - char *cc; - - // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and - // eol status, and to retain the filename for error messages, we'd need - // to go up to arg5 just for this. Compromise: dynamically allocate the - // filehandle and eol status. - -writenow: - while (isspace(*line)) line++; - if (!*line) goto brand; - for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break; - delim = *cc; - *cc = 0; - fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644); - *cc = delim; - - corwin->w = reg - (char *)corwin; - corwin = xrealloc(corwin, corwin->w+(cc-line)+6); - reg = corwin->w + (char *)corwin; - - memcpy(reg, &fd, 4); - reg += 4; - *(reg++) = 0; - memcpy(reg, line, delim); - reg += delim; - *(reg++) = 0; - - line = cc; - if (delim) line += 2; - } else if (c == 'y') { - char *s, delim = 0; - int len; - - if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand; - corwin->arg1 = reg-(char *)corwin; - len = strlen(s); - reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len); - free(s); - corwin->arg2 = reg-(char *)corwin; - if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand; - if (len != strlen(s)) goto brand; - reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len); - free(s); - } else if (strchr("abcirtTw:", c)) { - int end, class; - - // Trim whitespace from "b ;" and ": blah " but only first space in "w x " - - while (isspace(*line)) { - if (!strchr("btT", c) || *line != '\n') line++; - else break; - } -append: - class = !strchr("btT:", c); - end = strcspn(line, class ? "\n" : "; \t\r\n\v\f"); - - if (!end) { - if (!strchr("btT", c)) break; - continue; - } - - // Extend allocation to include new string. We use offsets instead of - // pointers so realloc() moving stuff doesn't break things. Do it - // here instead of toybuf so there's no maximum size. - if (!corwin->arg1) corwin->arg1 = reg - (char*)corwin; - reg = extend_string((void *)&corwin, line, reg - (char *)corwin, end); - line += end; - - // Line continuation? (Two slightly different input methods, -e with - // embedded newline vs -f line by line. Must parse both correctly.) - if (class && line[-1] == '\\') { - reg[-2] = 0; - if (*line && line[1]) { - reg -= 2; - line++; - goto append; - } else corwin->hit++; - } - - // Commands that take no arguments - } else if (!strchr("{dDgGhHlnNpPqx=", c)) break; - } - -brand: - // Reminisce about chestnut trees. - error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1, *line); -} - -void sed_main(void) -{ - struct arg_list *dworkin; - char **args = toys.optargs; - - // Lie to autoconf when it asks stupid questions, so configure regexes - // that look for "GNU sed version %f" greater than some old buggy number - // don't fail us for not matching their narrow expectations. - if (toys.optflags & FLAG_version) { - xprintf("This is not GNU sed version 9.0\n"); - return; - } - - // Need a pattern. If no unicorns about, fight serpent and take its eye. - if (!TT.e && !TT.f) { - if (!*toys.optargs) error_exit("no pattern"); - (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++); - } - - // Option parsing infrastructure can't interlace "-e blah -f blah -e blah" - // so handle all -e, then all -f. (At least the behavior's consistent.) - - for (dworkin = TT.e; dworkin; dworkin = dworkin->next) - jewel_of_judgement(&dworkin->arg, strlen(dworkin->arg)); - for (dworkin = TT.f; dworkin; dworkin = dworkin->next) - do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement); - dlist_terminate(TT.pattern); - if (TT.nextlen) error_exit("no }"); - - TT.fdout = 1; - TT.remember = xstrdup(""); - - // Inflict pattern upon input files - loopfiles_rw(args, O_RDONLY, 0, 0, do_sed); - - if (!(toys.optflags & FLAG_i)) walk_pattern(0, 0); - - // todo: need to close fd when done for TOYBOX_FREE? -} |