aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/sed.c
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2014-12-21 01:54:54 -0600
committerRob Landley <rob@landley.net>2014-12-21 01:54:54 -0600
commit1a1e0a9d325dd1ca7461a8e8203dd47bab6a8bc1 (patch)
tree6fc7da5f3bbd3ad9ead0fd52c2dc4d67d5a825cc /toys/pending/sed.c
parent32cd2b770fe3ac8c419a29156162f3a037cf47a3 (diff)
downloadtoybox-1a1e0a9d325dd1ca7461a8e8203dd47bab6a8bc1.tar.gz
Promote sed to posix.
Diffstat (limited to 'toys/pending/sed.c')
-rw-r--r--toys/pending/sed.c1002
1 files changed, 0 insertions, 1002 deletions
diff --git a/toys/pending/sed.c b/toys/pending/sed.c
deleted file mode 100644
index a5c24454..00000000
--- a/toys/pending/sed.c
+++ /dev/null
@@ -1,1002 +0,0 @@
-/* sed.c - stream editor. Thing that does s/// and other stuff.
- *
- * Copyright 2014 Rob Landley <rob@landley.net>
- *
- * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
- *
- * TODO: lines > 2G could signed int wrap length counters. Not just getline()
- * but N and s///
-
-USE_SED(NEWTOY(sed, "(version)e*f*inr", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE))
-
-config SED
- bool "sed"
- default n
- help
- usage: sed [-inr] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...]
-
- Stream editor. Apply one or more editing SCRIPTs to each line of input
- (from FILE or stdin) producing output (by default to stdout).
-
- -e add SCRIPT to list
- -f add contents of SCRIPT_FILE to list
- -i Edit each file in place.
- -n No default output. (Use the p command to output matched lines.)
- -r Use extended regular expression syntax.
- -s Treat input files separately (implied by -i)
-
- A SCRIPT is a series of one or more COMMANDs separated by newlines or
- semicolons. All -e SCRIPTs are concatenated together as if separated
- by newlines, followed by all lines from -f SCRIPT_FILEs, in order.
- If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT.
-
- Each COMMAND may be preceded by an address which limits the command to
- apply only to the specified line(s). Commands without an address apply to
- every line. Addresses are of the form:
-
- [ADDRESS[,ADDRESS]]COMMAND
-
- The ADDRESS may be a decimal line number (starting at 1), a /regular
- expression/ within a pair of forward slashes, or the character "$" which
- matches the last line of input. (In -s or -i mode this matches the last
- line of each file, otherwise just the last line of the last file.) A single
- address matches one line, a pair of comma separated addresses match
- everything from the first address to the second address (inclusive). If
- both addresses are regular expressions, more than one range of lines in
- each file can match.
-
- REGULAR EXPRESSIONS in sed are started and ended by the same character
- (traditionally / but anything except a backslash or a newline works).
- Backslashes may be used to escape the delimiter if it occurs in the
- regex, and for the usual printf escapes (\abcefnrtv and octal, hex,
- and unicode). An empty regex repeats the previous one. ADDRESS regexes
- (above) require the first delimeter to be escaped with a backslash when
- it isn't a forward slash (to distinguish it from the COMMANDs below).
-
- Sed mostly operates on individual lines one at a time. It reads each line,
- processes it, and either writes it to the output or discards it before
- reading the next line. Sed can remember one additional line in a separate
- buffer (using the h, H, g, G, and x commands), and can read the next line
- of input early (using the n and N command), but other than that command
- scripts operate on individual lines of text.
-
- Each COMMAND starts with a single character. The following commands take
- no arguments:
-
- { Start a new command block, continuing until a corresponding "}".
- Command blocks may nest. If the block has an address, commands within
- the block are only run for lines within the block's address range.
-
- } End command block (this command cannot have an address)
-
- d Delete this line and move on to the next one
- (ignores remaining COMMANDs)
-
- D Delete one line of input and restart command SCRIPT (same as "d"
- unless you've glued lines together with "N" or similar)
-
- g Get remembered line (overwriting current line)
-
- G Get remembered line (appending to current line)
-
- h Remember this line (overwriting remembered line)
-
- H Remember this line (appending to remembered line, if any)
-
- l Print line, escaping \abfrtv (but not newline), octal escaping other
- nonprintable characters, wrapping lines to terminal width with a
- backslash, and appending $ to actual end of line.
-
- n Print default output and read next line, replacing current line
- (If no next line available, quit processing script)
-
- N Append next line of input to this line, separated by a newline
- (This advances the line counter for address matching and "=", if no
- next line available quit processing script without default output)
-
- p Print this line
-
- P Print this line up to first newline (from "N")
-
- q Quit (print default output, no more commands processed or lines read)
-
- x Exchange this line with remembered line (overwrite in both directions)
-
- = Print the current line number (followed by a newline)
-
- The following commands (may) take an argument. The "text" arguments (to
- the "a", "b", and "c" commands) may end with an unescaped "\" to append
- the next line (for which leading whitespace is not skipped), and also
- treat ";" as a literal character (use "\;" instead).
-
- a [text] Append text to output before attempting to read next line
-
- b [label] Branch, jumps to :label (or with no label, to end of SCRIPT)
-
- c [text] Delete line, output text at end of matching address range
- (ignores remaining COMMANDs)
-
- i [text] Print text
-
- r [file] Append contents of file to output before attempting to read
- next line.
-
- s/S/R/F Search for regex S, replace matched text with R using flags F.
- The first character after the "s" (anything but newline or
- backslash) is the delimiter, escape with \ to use normally.
-
- The replacement text may contain "&" to substitute the matched
- text (escape it with backslash for a literal &), or \1 through
- \9 to substitute a parenthetical subexpression in the regex.
- You can also use the normal backslash escapes such as \n and
- a backslash at the end of the line appends the next line.
-
- The flags are:
-
- [0-9] A number, substitute only that occurrence of pattern
- g Global, substitute all occurrences of pattern
- i Ignore case when matching
- p Print the line if match was found and replaced
- w [file] Write (append) line to file if match replaced
-
- t [label] Test, jump to :label only if an "s" command found a match in
- this line since last test (replacing with same text counts)
-
- T [label] Test false, jump only if "s" hasn't found a match.
-
- w [file] Write (append) line to file
-
- y/old/new/ Change each character in 'old' to corresponding character
- in 'new' (with standard backslash escapes, delimiter can be
- any repeated character except \ or \n)
-
- : [label] Labeled target for jump commands
-
- # Comment, ignore rest of this line of SCRIPT
-
- Deviations from posix: allow extended regular expressions with -r,
- editing in place with -i, separate with -s, printf escapes in text, line
- continuations, semicolons after all commands, 2-address anywhere an
- address is allowed, "T" command, multiline continuations for [abc],
- \; to end [abc] argument before end of line.
-*/
-
-#define FOR_sed
-#include "toys.h"
-
-GLOBALS(
- struct arg_list *f;
- struct arg_list *e;
-
- // processed pattern list
- struct double_list *pattern;
-
- char *nextline, *remember;
- void *restart, *lastregex;
- long nextlen, rememberlen, count;
- int fdout, noeol;
- unsigned xx;
-)
-
-struct step {
- struct step *next, *prev;
-
- // Begin and end of each match
- long lmatch[2];
- int rmatch[2], arg1, arg2, w; // offsets because remalloc()
- unsigned not, hit, sflags;
- char c; // action
-};
-
-// Write out line with potential embedded NUL, handling eol/noeol
-static int emit(char *line, long len, int eol)
-{
- int l, old = line[len];
-
- if (TT.noeol && !writeall(TT.fdout, "\n", 1)) return 1;
- if (eol) line[len++] = '\n';
- if (!len) return 0;
- TT.noeol = len && !eol;
- l = writeall(TT.fdout, line, len);
- if (eol) line[len-1] = old;
- if (l != len) {
- perror_msg("short write");
-
- return 1;
- }
-
- return 0;
-}
-
-// Do regex matching handling embedded NUL bytes in string. Note that
-// neither the pattern nor the match can currently include NUL bytes
-// (even with wildcards) and string must be null terminated.
-static int ghostwheel(regex_t *preg, char *string, long len, int nmatch,
- regmatch_t pmatch[], int eflags)
-{
- char *s = string;
-
- for (;;) {
- long ll = 0;
- int rc;
-
- while (len && !*s) {
- s++;
- len--;
- }
- while (s[ll] && ll<len) ll++;
-
- rc = regexec(preg, s, nmatch, pmatch, eflags);
- if (!rc) {
- for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
- pmatch[rc].rm_so += s-string;
- pmatch[rc].rm_eo += s-string;
- }
-
- return 0;
- }
- if (ll==len) return rc;
-
- s += ll;
- len -= ll;
- }
-}
-
-// Extend allocation to include new string, with newline between if newlen<0
-
-static char *extend_string(char **old, char *new, int oldlen, int newlen)
-{
- int newline = newlen < 0;
- char *s;
-
- if (newline) newlen = -newlen;
- s = *old = xrealloc(*old, oldlen+newlen+newline+1);
- if (newline) s[oldlen++] = '\n';
- memcpy(s+oldlen, new, newlen);
- s[oldlen+newlen] = 0;
-
- return s+oldlen+newlen+1;
-}
-
-// An empty regex repeats the previous one
-void *get_regex(void *trump, int offset)
-{
- if (!offset) {
- if (!TT.lastregex) error_exit("no previous regex");
- return TT.lastregex;
- }
-
- return TT.lastregex = offset+(char *)trump;
-}
-
-// Apply pattern to line from input file
-static void walk_pattern(char **pline, long plen)
-{
- struct append {
- struct append *next, *prev;
- int file;
- char *str;
- } *append = 0;
- char *line = TT.nextline;
- long len = TT.nextlen;
- struct step *logrus;
- int eol = 0, tea = 0;
-
- // Grab next line for deferred processing (EOF detection: we get a NULL
- // pline at EOF to flush last line). Note that only end of _last_ input
- // file matches $ (unless we're doing -i).
- TT.nextline = 0;
- TT.nextlen = 0;
- if (pline) {
- TT.nextline = *pline;
- TT.nextlen = plen;
- *pline = 0;
- }
-
- if (!line || !len) return;
- if (line[len-1] == '\n') line[--len] = eol++;
- TT.count++;
-
- logrus = TT.restart ? TT.restart : (void *)TT.pattern;
- TT.restart = 0;
-
- while (logrus) {
- char *str, c = logrus->c;
-
- // Have we got a line or regex matching range for this rule?
- if (*logrus->lmatch || *logrus->rmatch) {
- int miss = 0;
- long lm;
-
- // In a match that might end?
- if (logrus->hit) {
- if (!(lm = logrus->lmatch[1])) {
- if (!logrus->rmatch[1]) logrus->hit = 0;
- else {
- void *rm = get_regex(logrus, logrus->rmatch[1]);
-
- // regex match end includes matching line, so defer deactivation
- if (line && !ghostwheel(rm, line, len, 0, 0, 0)) miss = 1;
- }
- } else if (lm > 0 && lm < TT.count) logrus->hit = 0;
-
- // Start a new match?
- } else {
- if (!(lm = *logrus->lmatch)) {
- void *rm = get_regex(logrus, *logrus->rmatch);
-
- if (line && !ghostwheel(rm, line, len, 0, 0, 0)) logrus->hit++;
- } else if (lm == TT.count || (lm == -1 && !pline)) logrus->hit++;
-
- if (!logrus->lmatch[1] && !logrus->rmatch[1]) miss = 1;
- }
-
- // Didn't match?
- lm = !(logrus->hit ^ logrus->not);
-
- // Deferred disable from regex end match
- if (miss || logrus->lmatch[1] == TT.count) logrus->hit = 0;
-
- if (lm) {
- // Handle skipping curly bracket command group
- if (c == '{') {
- int curly = 1;
-
- while (curly) {
- logrus = logrus->next;
- if (logrus->c == '{') curly++;
- if (logrus->c == '}') curly--;
- }
- }
- logrus = logrus->next;
- continue;
- }
- }
-
- // A deleted line can still update line match state for later commands
- if (!line) {
- logrus = logrus->next;
- continue;
- }
-
- // Process command
-
- if (c=='a' || c=='r') {
- struct append *a = xzalloc(sizeof(struct append));
- a->str = logrus->arg1+(char *)logrus;
- a->file = c== 'r';
- dlist_add_nomalloc((void *)&append, (void *)a);
- } else if (c=='b' || c=='t' || c=='T') {
- int t = tea;
-
- if (c != 'b') tea = 0;
- if (c=='b' || t^(c=='T')) {
- if (!logrus->arg1) break;
- str = logrus->arg1+(char *)logrus;
- for (logrus = (void *)TT.pattern; logrus; logrus = logrus->next)
- if (logrus->c == ':' && !strcmp(logrus->arg1+(char *)logrus, str))
- break;
- if (!logrus) error_exit("no :%s", str);
- }
- } else if (c=='c') {
- str = logrus->arg1+(char *)logrus;
- if (!logrus->hit) emit(str, strlen(str), 1);
- free(line);
- line = 0;
- continue;
- } else if (c=='d') {
- free(line);
- line = 0;
- continue;
- } else if (c=='D') {
- // Delete up to \n or end of buffer
- str = line;
- while ((str-line)<len) if (*(str++) == '\n') break;
- len -= str - line;
- memmove(line, str, len);
-
- // if "delete" blanks line, disable further processing
- // otherwise trim and restart script
- if (!len) {
- free(line);
- line = 0;
- } else {
- line[len] = 0;
- logrus = (void *)TT.pattern;
- }
- continue;
- } else if (c=='g') {
- free(line);
- line = xstrdup(TT.remember);
- len = TT.rememberlen;
- } else if (c=='G') {
- line = xrealloc(line, len+TT.rememberlen+2);
- line[len++] = '\n';
- memcpy(line+len, TT.remember, TT.rememberlen);
- line[len += TT.rememberlen] = 0;
- } else if (c=='h') {
- free(TT.remember);
- TT.remember = xstrdup(line);
- TT.rememberlen = len;
- } else if (c=='H') {
- TT.remember = xrealloc(TT.remember, TT.rememberlen+len+2);
- TT.remember[TT.rememberlen++] = '\n';
- memcpy(TT.remember+TT.rememberlen, line, len);
- TT.remember[TT.rememberlen += len] = 0;
- } else if (c=='i') {
- str = logrus->arg1+(char *)logrus;
- emit(str, strlen(str), 1);
- } else if (c=='l') {
- int i, x, off;
-
- if (!TT.xx) {
- terminal_size(&TT.xx, 0);
- if (!TT.xx) TT.xx = 80;
- if (TT.xx > sizeof(toybuf)-10) TT.xx = sizeof(toybuf)-10;
- if (TT.xx > 4) TT.xx -= 4;
- }
-
- for (i = off = 0; i<len; i++) {
- if (off >= TT.xx) {
- toybuf[off++] = '\\';
- emit(toybuf, off, 1);
- off = 0;
- }
- x = stridx("\\\a\b\f\r\t\v", line[i]);
- if (x != -1) {
- toybuf[off++] = '\\';
- toybuf[off++] = "\\abfrtv"[x];
- } else if (line[i] >= ' ') toybuf[off++] = line[i];
- else off += sprintf(toybuf+off, "\\%03o", line[i]);
- }
- toybuf[off++] = '$';
- emit(toybuf, off, 1);
- } else if (c=='n') {
- TT.restart = logrus->next;
-
- break;
- } else if (c=='N') {
- // Can't just grab next line because we could have multiple N and
- // we need to actually read ahead to get N;$p EOF detection right.
- if (pline) {
- TT.restart = logrus->next;
- extend_string(&line, TT.nextline, len, -TT.nextlen);
- free(TT.nextline);
- TT.nextline = line;
- TT.nextlen += len + 1;
- line = 0;
- }
-
- // Pending append goes out right after N
- goto done;
- } else if (c=='p' || c=='P') {
- char *l = (c=='P') ? strchr(line, '\n') : 0;
-
- if (emit(line, l ? l-line : len, eol)) break;
- } else if (c=='q') {
- if (pline) *pline = (void *)1;
- free(TT.nextline);
- TT.nextline = 0;
- TT.nextlen = 0;
-
- break;
- } else if (c=='s') {
- char *rline = line, *new = logrus->arg2 + (char *)logrus, *swap, *rswap;
- regmatch_t *match = (void *)toybuf;
- regex_t *reg = get_regex(logrus, logrus->arg1);
- int mflags = 0, count = 0, zmatch = 1, rlen = len, mlen, off, newlen;
-
- // Find match in remaining line (up to remaining len)
- while (!ghostwheel(reg, rline, rlen, 10, match, mflags)) {
- mflags = REG_NOTBOL;
-
- // Zero length matches don't count immediately after a previous match
- mlen = match[0].rm_eo-match[0].rm_so;
- if (!mlen && !zmatch) {
- if (!rlen--) break;
- rline++;
- zmatch++;
- continue;
- } else zmatch = 0;
-
- // If we're replacing only a specific match, skip if this isn't it
- off = logrus->sflags>>3;
- if (off && off != ++count) {
- rline += match[0].rm_eo;
- rlen -= match[0].rm_eo;
-
- continue;
- }
- // The fact getline() can allocate unbounded amounts of memory is
- // a bigger issue, but while we're here check for integer overflow
- if (match[0].rm_eo > INT_MAX) perror_exit(0);
-
- // newlen = strlen(new) but with \1 and & and printf escapes
- for (off = newlen = 0; new[off]; off++) {
- int cc = -1;
-
- if (new[off] == '&') cc = 0;
- else if (new[off] == '\\') cc = new[++off] - '0';
- if (cc < 0 || cc > 9) {
- newlen++;
- continue;
- }
- newlen += match[cc].rm_eo-match[cc].rm_so;
- }
-
- // Allocate new size, copy start/end around match. (Can't extend in
- // place because backrefs may refer to text after it's overwritten.)
- len += newlen-mlen;
- swap = xmalloc(len+1);
- rswap = swap+(rline-line)+match[0].rm_so;
- memcpy(swap, line, (rline-line)+match[0].rm_so);
- memcpy(rswap+newlen, rline+match[0].rm_eo, (rlen -= match[0].rm_eo)+1);
-
- // copy in new replacement text
- for (off = mlen = 0; new[off]; off++) {
- int cc = 0, ll;
-
- if (new[off] == '\\') {
- cc = new[++off] - '0';
- if (cc<0 || cc>9) {
- if (!(rswap[mlen++] = unescape(new[off])))
- rswap[mlen-1] = new[off];
-
- continue;
- } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc);
- } else if (new[off] != '&') {
- rswap[mlen++] = new[off];
-
- continue;
- }
-
- ll = match[cc].rm_eo-match[cc].rm_so;
- memcpy(rswap+mlen, rline+match[cc].rm_so, ll);
- mlen += ll;
- }
-
- rline = rswap+newlen;
- free(line);
- line = swap;
-
- // Stop after first substitution unless we have flag g
- if (!(logrus->sflags & 2)) break;
- }
-
- if (mflags) {
- // flag p
- if (logrus->sflags & 4) emit(line, len, eol);
-
- tea = 1;
- if (logrus->w) goto writenow;
- }
- } else if (c=='w') {
- int fd, noeol;
- char *name;
-
-writenow:
- // Swap out emit() context
- fd = TT.fdout;
- noeol = TT.noeol;
-
- // We save filehandle and newline status before filename
- name = logrus->w + (char *)logrus;
- memcpy(&TT.fdout, name, 4);
- name += 4;
- TT.noeol = *(name++);
-
- // write, then save/restore context
- if (emit(line, len, eol))
- perror_exit("w '%s'", logrus->arg1+(char *)logrus);
- *(--name) = TT.noeol;
- TT.noeol = noeol;
- TT.fdout = fd;
- } else if (c=='x') {
- long swap = TT.rememberlen;
-
- str = TT.remember;
- TT.remember = line;
- line = str;
- TT.rememberlen = len;
- len = swap;
- } else if (c=='y') {
- char *from, *to = (char *)logrus;
- int i, j;
-
- from = to+logrus->arg1;
- to += logrus->arg2;
-
- for (i = 0; i < len; i++) {
- j = stridx(from, line[i]);
- if (j != -1) line[i] = to[j];
- }
- } else if (c=='=') {
- sprintf(toybuf, "%ld", TT.count);
- emit(toybuf, strlen(toybuf), 1);
- }
-
- logrus = logrus->next;
- }
-
- if (line && !(toys.optflags & FLAG_n)) emit(line, len, eol);
-
-done:
- free(line);
-
- if (dlist_terminate(append)) while (append) {
- struct append *a = append->next;
-
- if (append->file) {
- int fd = xopen(append->str, O_RDONLY);
-
- // Force newline if noeol pending
- emit(0, 0, 0);
- xsendfile(fd, TT.fdout);
- close(fd);
- } else emit(append->str, strlen(append->str), 1);
- free(append);
- append = a;
- }
-}
-
-// Genericish function, can probably get moved to lib.c
-
-// Iterate over lines in file, calling function. Function can write 0 to
-// the line pointer if they want to keep it, or 1 to terminate processing,
-// otherwise line is freed. Passed file descriptor is closed at the end.
-static void do_lines(int fd, char *name, void (*call)(char **pline, long len))
-{
- FILE *fp = fd ? xfdopen(fd, "r") : stdin;
-
- for (;;) {
- char *line = 0;
- ssize_t len;
-
- len = getline(&line, (void *)&len, fp);
- if (len > 0) {
- call(&line, len);
- if (line == (void *)1) break;
- free(line);
- } else break;
- }
-
- if (fd) fclose(fp);
-}
-
-static void do_sed(int fd, char *name)
-{
- int i = toys.optflags & FLAG_i;
- char *tmp;
-
- if (i) {
- struct step *primal;
-
- if (!fd && *name=='-') {
- error_msg("no -i on stdin");
- return;
- }
- TT.fdout = copy_tempfile(fd, name, &tmp);
- TT.count = 0;
- for (primal = (void *)TT.pattern; primal; primal = primal->next)
- primal->hit = 0;
- }
- do_lines(fd, name, walk_pattern);
- if (i) {
- walk_pattern(0, 0);
- replace_tempfile(-1, TT.fdout, &tmp);
- TT.fdout = 1;
- TT.nextline = 0;
- TT.nextlen = TT.noeol = 0;
- }
-}
-
-// Copy chunk of string between two delimiters, converting printf escapes.
-// returns processed copy of string (0 if error), *pstr advances to next
-// unused char. if delim (or *delim) is 0 uses/saves starting char as delimiter
-// if regxex, ignore delimiter in [ranges]
-static char *unescape_delimited_string(char **pstr, char *delim, int regex)
-{
- char *to, *from, d;
-
- to = from = *pstr;
- if (!delim || !*delim) {
- if (!(d = *(from++))) return 0;
- if (d == '\\') d = *(from++);
- if (!d || d == '\\') return 0;
- if (delim) *delim = d;
- } else d = *delim;
- to = delim = xmalloc(strlen(*pstr)+1);
-
- while (*from != d) {
- if (!*from) return 0;
-
- // delimiter in regex character range doesn't count
- if (*from == '[') {
- int len = 1;
-
- if (from[len] == ']') len++;
- while (from[len] != ']') if (!from[len++]) return 0;
- memmove(to, from, ++len);
- to += len;
- from += len;
- continue;
- }
- if (*from == '\\') {
- if (!from[1]) return 0;
-
- // Check escaped end delimiter before printf style escapes.
- if (from[1] == d) from++;
- else if (from[1]=='\\') *(to++) = *(from++);
- else {
- char c = unescape(from[1]);
-
- if (c) {
- *(to++) = c;
- from+=2;
- continue;
- }
- }
- }
- *(to++) = *(from++);
- }
- *to = 0;
- *pstr = from+1;
-
- return delim;
-}
-
-// Translate primal pattern into walkable form.
-static void jewel_of_judgement(char **pline, long len)
-{
- struct step *corwin = (void *)TT.pattern;
- char *line = *pline, *reg, c, *errstart = *pline;
- int i;
-
- // Append additional line to pattern argument string?
- if (corwin && corwin->prev->hit) {
- // Remove half-finished entry from list so remalloc() doesn't confuse it
- TT.pattern = TT.pattern->prev;
- corwin = dlist_pop(&TT.pattern);
- corwin->hit = 0;
- c = corwin->c;
- reg = (char *)corwin;
- reg += corwin->arg1 + strlen(reg + corwin->arg1);
-
- // Resume parsing
- goto append;
- }
-
- // Loop through commands in line
-
- corwin = 0;
- for (;;) {
- if (corwin) dlist_add_nomalloc(&TT.pattern, (void *)corwin);
-
- while (isspace(*line) || *line == ';') line++;
- if (!*line || *line == '#') return;
-
- errstart = line;
- memset(toybuf, 0, sizeof(struct step));
- corwin = (void *)toybuf;
- reg = toybuf + sizeof(struct step);
-
- // Parse address range (if any)
- for (i = 0; i < 2; i++) {
- if (*line == ',') line++;
- else if (i) break;
-
- if (isdigit(*line)) corwin->lmatch[i] = strtol(line, &line, 0);
- else if (*line == '$') {
- corwin->lmatch[i] = -1;
- line++;
- } else if (*line == '/' || *line == '\\') {
- char *s = line;
-
- if (!(s = unescape_delimited_string(&line, 0, 1))) goto brand;
- if (!*s) corwin->rmatch[i] = 0;
- else {
- xregcomp((void *)reg, s, (toys.optflags & FLAG_r)*REG_EXTENDED);
- corwin->rmatch[i] = reg-toybuf;
- reg += sizeof(regex_t);
- }
- free(s);
- } else break;
- }
-
- while (isspace(*line)) line++;
- if (!*line) break;
-
- while (*line == '!') {
- corwin->not = 1;
- line++;
- }
- while (isspace(*line)) line++;
-
- c = corwin->c = *(line++);
- if (strchr("}:", c) && i) break;
- if (strchr("aiqr=", c) && i>1) break;
-
- // Add step to pattern
- corwin = xmalloc(reg-toybuf);
- memcpy(corwin, toybuf, reg-toybuf);
- reg = (reg-toybuf) + (char *)corwin;
-
- // Parse arguments by command type
- if (c == '{') TT.nextlen++;
- else if (c == '}') {
- if (!TT.nextlen--) break;
- } else if (c == 's') {
- char *merlin, *fiona, delim = 0;
-
- // s/pattern/replacement/flags
-
- // get pattern (just record, we parse it later)
- corwin->arg1 = reg - (char *)corwin;
- if (!(merlin = unescape_delimited_string(&line, &delim, 1))) goto brand;
-
- // get replacement - don't replace escapes because \1 and \& need
- // processing later, after we replace \\ with \ we can't tell \\1 from \1
- fiona = line;
- while (*line != delim) {
- if (!*line) goto brand;
- if (*line == '\\') {
- if (!line[1]) goto brand;
- line += 2;
- } else line++;
- }
-
- corwin->arg2 = corwin->arg1 + sizeof(regex_t);
- reg = extend_string((void *)&corwin, fiona, corwin->arg2, line-fiona)+1;
-
- // get flags
- for (line++; *line; line++) {
- long l;
-
- if (isspace(*line) && *line != '\n') continue;
-
- if (0 <= (l = stridx("igp", *line))) corwin->sflags |= 1<<l;
- else if (!(corwin->sflags>>3) && 0<(l = strtol(line, &line, 10))) {
- corwin->sflags |= l << 3;
- line--;
- } else break;
- }
-
- // We deferred actually parsing the regex until we had the s///i flag
- // allocating the space was done by extend_string() above
- if (!*merlin) corwin->arg1 = 0;
- else xregcomp((void *)(corwin->arg1 + (char *)corwin), merlin,
- ((toys.optflags & FLAG_r)*REG_EXTENDED)|((corwin->sflags&1)*REG_ICASE));
- free(merlin);
- if (*line == 'w') {
- line++;
- goto writenow;
- }
- } else if (c == 'w') {
- int fd, delim;
- char *cc;
-
- // Since s/// uses arg1 and arg2, and w needs a persistent filehandle and
- // eol status, and to retain the filename for error messages, we'd need
- // to go up to arg5 just for this. Compromise: dynamically allocate the
- // filehandle and eol status.
-
-writenow:
- while (isspace(*line)) line++;
- if (!*line) goto brand;
- for (cc = line; *cc; cc++) if (*cc == '\\' && cc[1] == ';') break;
- delim = *cc;
- *cc = 0;
- fd = xcreate(line, O_WRONLY|O_CREAT|O_TRUNC, 0644);
- *cc = delim;
-
- corwin->w = reg - (char *)corwin;
- corwin = xrealloc(corwin, corwin->w+(cc-line)+6);
- reg = corwin->w + (char *)corwin;
-
- memcpy(reg, &fd, 4);
- reg += 4;
- *(reg++) = 0;
- memcpy(reg, line, delim);
- reg += delim;
- *(reg++) = 0;
-
- line = cc;
- if (delim) line += 2;
- } else if (c == 'y') {
- char *s, delim = 0;
- int len;
-
- if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
- corwin->arg1 = reg-(char *)corwin;
- len = strlen(s);
- reg = extend_string((void *)&corwin, s, reg-(char *)corwin, len);
- free(s);
- corwin->arg2 = reg-(char *)corwin;
- if (!(s = unescape_delimited_string(&line, &delim, 0))) goto brand;
- if (len != strlen(s)) goto brand;
- reg = extend_string((void *)&corwin, s, reg-(char*)corwin, len);
- free(s);
- } else if (strchr("abcirtTw:", c)) {
- int end, class;
-
- // Trim whitespace from "b ;" and ": blah " but only first space in "w x "
-
- while (isspace(*line)) {
- if (!strchr("btT", c) || *line != '\n') line++;
- else break;
- }
-append:
- class = !strchr("btT:", c);
- end = strcspn(line, class ? "\n" : "; \t\r\n\v\f");
-
- if (!end) {
- if (!strchr("btT", c)) break;
- continue;
- }
-
- // Extend allocation to include new string. We use offsets instead of
- // pointers so realloc() moving stuff doesn't break things. Do it
- // here instead of toybuf so there's no maximum size.
- if (!corwin->arg1) corwin->arg1 = reg - (char*)corwin;
- reg = extend_string((void *)&corwin, line, reg - (char *)corwin, end);
- line += end;
-
- // Line continuation? (Two slightly different input methods, -e with
- // embedded newline vs -f line by line. Must parse both correctly.)
- if (class && line[-1] == '\\') {
- reg[-2] = 0;
- if (*line && line[1]) {
- reg -= 2;
- line++;
- goto append;
- } else corwin->hit++;
- }
-
- // Commands that take no arguments
- } else if (!strchr("{dDgGhHlnNpPqx=", c)) break;
- }
-
-brand:
- // Reminisce about chestnut trees.
- error_exit("bad pattern '%s'@%ld (%c)", errstart, line-errstart+1, *line);
-}
-
-void sed_main(void)
-{
- struct arg_list *dworkin;
- char **args = toys.optargs;
-
- // Lie to autoconf when it asks stupid questions, so configure regexes
- // that look for "GNU sed version %f" greater than some old buggy number
- // don't fail us for not matching their narrow expectations.
- if (toys.optflags & FLAG_version) {
- xprintf("This is not GNU sed version 9.0\n");
- return;
- }
-
- // Need a pattern. If no unicorns about, fight serpent and take its eye.
- if (!TT.e && !TT.f) {
- if (!*toys.optargs) error_exit("no pattern");
- (TT.e = xzalloc(sizeof(struct arg_list)))->arg = *(args++);
- }
-
- // Option parsing infrastructure can't interlace "-e blah -f blah -e blah"
- // so handle all -e, then all -f. (At least the behavior's consistent.)
-
- for (dworkin = TT.e; dworkin; dworkin = dworkin->next)
- jewel_of_judgement(&dworkin->arg, strlen(dworkin->arg));
- for (dworkin = TT.f; dworkin; dworkin = dworkin->next)
- do_lines(xopen(dworkin->arg, O_RDONLY), dworkin->arg, jewel_of_judgement);
- dlist_terminate(TT.pattern);
- if (TT.nextlen) error_exit("no }");
-
- TT.fdout = 1;
- TT.remember = xstrdup("");
-
- // Inflict pattern upon input files
- loopfiles_rw(args, O_RDONLY, 0, 0, do_sed);
-
- if (!(toys.optflags & FLAG_i)) walk_pattern(0, 0);
-
- // todo: need to close fd when done for TOYBOX_FREE?
-}