diff options
| -rw-r--r-- | toys/posix/sed.c | 179 | 
1 files changed, 64 insertions, 115 deletions
diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 0896959f..c51585a6 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -10,6 +10,12 @@   * TODO: handle error return from emit(), error_msg/exit consistently   *       What's the right thing to do for -i when write fails? Skip to next?   * test '//q' with no previous regex, also repeat previous regex? + * + * Deviations from POSIX: allow extended regular expressions with -r, + * editing in place with -i, separate with -s, NUL-separated input with -z, + * printf escapes in text, line continuations, semicolons after all commands, + * 2-address anywhere an address is allowed, "T" command, multiline + * continuations for [abc], \; to end [abc] argument before end of line.  USE_SED(NEWTOY(sed, "(help)(version)e*f*i:;nErz(null-data)[+Er]", TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLAG_NOHELP)) @@ -19,8 +25,7 @@ config SED    help      usage: sed [-inrzE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...] -    Stream editor. Apply one or more editing SCRIPTs to each line of input -    (from FILE or stdin) producing output (by default to stdout). +    Stream editor. Apply editing SCRIPTs to lines of input.      -e	Add SCRIPT to list      -f	Add contents of SCRIPT_FILE to list @@ -29,144 +34,88 @@ config SED      -r	Use extended regular expression syntax      -E	POSIX alias for -r      -s	Treat input files separately (implied by -i) -    -z	Use \0 rather than \n as the input line separator +    -z	Use \0 rather than \n as input line separator -    A SCRIPT is a series of one or more COMMANDs separated by newlines or -    semicolons. All -e SCRIPTs are concatenated together as if separated -    by newlines, followed by all lines from -f SCRIPT_FILEs, in order. -    If no -e or -f SCRIPTs are specified, the first argument is the SCRIPT. +    A SCRIPT is one or more COMMANDs separated by newlines or semicolons. +    All -e SCRIPTs are combined as if separated by newlines, followed by all -f +    SCRIPT_FILEs. If no -e or -f then first argument is the SCRIPT. -    Each COMMAND may be preceded by an address which limits the command to -    apply only to the specified line(s). Commands without an address apply to -    every line. Addresses are of the form: +    COMMANDs apply to every line unless prefixed with an ADDRESS of the form:        [ADDRESS[,ADDRESS]][!]COMMAND -    The ADDRESS may be a decimal line number (starting at 1), a /regular -    expression/ within a pair of forward slashes, or the character "$" which -    matches the last line of input. (In -s or -i mode this matches the last -    line of each file, otherwise just the last line of the last file.) A single -    address matches one line, a pair of comma separated addresses match -    everything from the first address to the second address (inclusive). If -    both addresses are regular expressions, more than one range of lines in -    each file can match. The second address can be +N to end N lines later. - -    REGULAR EXPRESSIONS in sed are started and ended by the same character -    (traditionally / but anything except a backslash or a newline works). -    Backslashes may be used to escape the delimiter if it occurs in the -    regex, and for the usual printf escapes (\abcefnrtv and octal, hex, -    and unicode). An empty regex repeats the previous one. ADDRESS regexes -    (above) require the first delimiter to be escaped with a backslash when -    it isn't a forward slash (to distinguish it from the COMMANDs below). - -    Sed mostly operates on individual lines one at a time. It reads each line, -    processes it, and either writes it to the output or discards it before -    reading the next line. Sed can remember one additional line in a separate -    buffer (using the h, H, g, G, and x commands), and can read the next line -    of input early (using the n and N command), but other than that command -    scripts operate on individual lines of text. - -    Each COMMAND starts with a single character. The following commands take -    no arguments: - -      !  Run this command when the test _didn't_ match. - -      {  Start a new command block, continuing until a corresponding "}". -         Command blocks may nest. If the block has an address, commands within -         the block are only run for lines within the block's address range. - -      }  End command block (this command cannot have an address) +    ADDRESS is a line number (starting at 1), a /REGULAR EXPRESSION/, or $ for +    last line (-s or -i makes it last line of each file). One address matches one +    line, ADDRESS,ADDRESS matches from first to second inclusive. Two regexes can +    match multiple ranges. ADDRESS,+N ends N lines later. ! inverts the match. + +    REGULAR EXPRESSIONS start and end with the same character (anything but +    backslash or newline). To use the delimiter in the regex escape it with a +    backslash, and printf escapes (\abcefnrtv and octal, hex, and unicode) work. +    An empty regex repeats the previous one. ADDRESS regexes require any +    first delimiter except / to be \escaped to distinguish it from COMMANDs. + +    Sed reads each line of input, processes it, and writes it out or discards it +    before reading the next. Sed can remember one additional line in a separate +    buffer (the h, H, g, G, and x commands), and can read the next line of input +    early (the n and N commands), but otherwise operates on individual lines. + +    Each COMMAND starts with a single character. Commands with no arguments are: +      !  Run this command when the ADDRESS _didn't_ match. +      {  Start new command block, continuing until a corresponding "}". +         Command blocks nest and can have ADDRESSes applying to the whole block. +      }  End command block (this COMMAND cannot have an address)        d  Delete this line and move on to the next one           (ignores remaining COMMANDs) -        D  Delete one line of input and restart command SCRIPT (same as "d"           unless you've glued lines together with "N" or similar) -        g  Get remembered line (overwriting current line) -        G  Get remembered line (appending to current line) -        h  Remember this line (overwriting remembered line) -        H  Remember this line (appending to remembered line, if any) - -      l  Print line, escaping \abfrtv (but not newline), octal escaping other -         nonprintable characters, wrapping lines to terminal width with a -         backslash, and appending $ to actual end of line. - -      n  Print default output and read next line, replacing current line -         (If no next line available, quit processing script) - -      N  Append next line of input to this line, separated by a newline -         (This advances the line counter for address matching and "=", if no -         next line available quit processing script without default output) - +      l  Print line escaping \abfrtv (but not \n), octal escape other nonprintng +         chars, wrap lines to terminal width with \, append $ to end of line. +      n  Print default output and read next line over current line (quit at EOF) +      N  Append \n and next line of input to this line. Quit at EOF without +         default output. Advances line counter for ADDRESS and "=".        p  Print this line -        P  Print this line up to first newline (from "N") -        q  Quit (print default output, no more commands processed or lines read) -        x  Exchange this line with remembered line (overwrite in both directions) - -      =  Print the current line number (followed by a newline) - -    The following commands (may) take an argument. The "text" arguments (to -    the "a", "b", and "c" commands) may end with an unescaped "\" to append -    the next line (for which leading whitespace is not skipped), and also -    treat ";" as a literal character (use "\;" instead). - -      a [text]   Append text to output before attempting to read next line - -      b [label]  Branch, jumps to :label (or with no label, to end of SCRIPT) - -      c [text]   Delete line, output text at end of matching address range -                 (ignores remaining COMMANDs) - -      i [text]   Print text - -      r [file]   Append contents of file to output before attempting to read -                 next line. - -      s/S/R/F    Search for regex S, replace matched text with R using flags F. -                 The first character after the "s" (anything but newline or -                 backslash) is the delimiter, escape with \ to use normally. - -                 The replacement text may contain "&" to substitute the matched -                 text (escape it with backslash for a literal &), or \1 through -                 \9 to substitute a parenthetical subexpression in the regex. -                 You can also use the normal backslash escapes such as \n and -                 a backslash at the end of the line appends the next line. - -                 The flags are: - -                 [0-9]    A number, substitute only that occurrence of pattern -                 g        Global, substitute all occurrences of pattern +      =  Print the current line number (plus newline) +      #  Comment, ignores rest of this line of SCRIPT (until newline) + +    Commands that take an argument:  + +      : LABEL    Target for jump commands +      a TEXT     Append text to output before reading next line +      b LABEL    Branch, jumps to :LABEL (with no LABEL to end of SCRIPT) +      c TEXT     Delete matching ADDRESS range and output TEXT instead +      i TEXT     Insert text (output immediately) +      r FILE     Append contents of FILIE to output before reading next line. +      s/S/R/F    Search for regex S replace match with R using flags F. Delimiter +                 is anything but \n or \, escape with \ to use in S or R. Printf +                 escapes work. Unescaped & in R becomes full matched text, \1 +                 through \9 = parenthetical subexpression from S. \ at end of +                 line appends next line of SCRIPT. The flags in F are: +                 [0-9]    A number N, substitute only Nth match +                 g        Global, substitute all matches                   i/I      Ignore case when matching -                 p        Print the line if match was found and replaced -                 w [file] Write (append) line to file if match replaced - -      t [label]  Test, jump to :label only if an "s" command found a match in -                 this line since last test (replacing with same text counts) - -      T [label]  Test false, jump only if "s" hasn't found a match. - -      w [file]   Write (append) line to file - +                 p        Print resulting line when match found and replaced +                 w [file] Write (append) line to file when match replaced +      t LABEL    Test, jump if s/// command matched this line since last test  +      T LABEL    Test false, jump to :LABEL only if no s/// found a match +      w FILE     Write (append) line to file        y/old/new/ Change each character in 'old' to corresponding character                   in 'new' (with standard backslash escapes, delimiter can be                   any repeated character except \ or \n) -      : [label]  Labeled target for jump commands +    The TEXT arguments (to a c i) may end with an unescaped "\" to append +    the next line (leading whitespace is not skipped), and treat ";" as a +    literal character (use "\;" instead). -      #  Comment, ignore rest of this line of SCRIPT -    Deviations from POSIX: allow extended regular expressions with -r, -    editing in place with -i, separate with -s, NUL-separated input with -z, -    printf escapes in text, line continuations, semicolons after all commands, -    2-address anywhere an address is allowed, "T" command, multiline -    continuations for [abc], \; to end [abc] argument before end of line.  */  #define FOR_sed  | 
