From 411f7fc4781f6758a50ce206df62efd809c20186 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Sat, 12 Jan 2019 09:30:29 -0800 Subject: sed: add -z. Used to construct SELinux policies in the AOSP build. I left loopfiles_lines with its hard-coded '\n' because although cut(1) also has a -z option, I can't find any case where it's used in any of the codebases searchable by me. (And fmt(1), the other user, doesn't even have the option.) YAGNI. Bug: http://b/122744241 --- lib/lib.c | 6 +++--- lib/lib.h | 2 +- tests/sed.test | 2 ++ toys/posix/sed.c | 25 +++++++++++++++---------- 4 files changed, 21 insertions(+), 14 deletions(-) diff --git a/lib/lib.c b/lib/lib.c index 2ba009f9..b074a92b 100644 --- a/lib/lib.c +++ b/lib/lib.c @@ -656,7 +656,7 @@ void loopfiles(char **argv, void (*function)(int fd, char *name)) static void (*do_lines_bridge)(char **pline, long len); static void loopfile_lines_bridge(int fd, char *name) { - do_lines(fd, do_lines_bridge); + do_lines(fd, '\n', do_lines_bridge); } void loopfiles_lines(char **argv, void (*function)(char **pline, long len)) @@ -1356,7 +1356,7 @@ char *getgroupname(gid_t gid) // the line pointer if they want to keep it, or 1 to terminate processing, // otherwise line is freed. Passed file descriptor is closed at the end. // At EOF calls function(0, 0) -void do_lines(int fd, void (*call)(char **pline, long len)) +void do_lines(int fd, char delim, void (*call)(char **pline, long len)) { FILE *fp = fd ? xfdopen(fd, "r") : stdin; @@ -1364,7 +1364,7 @@ void do_lines(int fd, void (*call)(char **pline, long len)) char *line = 0; ssize_t len; - len = getline(&line, (void *)&len, fp); + len = getdelim(&line, (void *)&len, delim, fp); if (len > 0) { call(&line, len); if (line == (void *)1) break; diff --git a/lib/lib.h b/lib/lib.h index 3e6838ea..578a99c9 100644 --- a/lib/lib.h +++ b/lib/lib.h @@ -253,7 +253,7 @@ int regexec0(regex_t *preg, char *string, long len, int nmatch, regmatch_t pmatch[], int eflags); char *getusername(uid_t uid); char *getgroupname(gid_t gid); -void do_lines(int fd, void (*call)(char **pline, long len)); +void do_lines(int fd, char delim, void (*call)(char **pline, long len)); long environ_bytes(); long long millitime(void); char *format_iso_time(char *buf, size_t len, struct timespec *ts); diff --git a/tests/sed.test b/tests/sed.test index a7d5a6eb..f2ff8fd7 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -165,4 +165,6 @@ testing "bonus backslashes" \ testing "end b with }" "sed -n '/START/{:a;n;/END/q;p;ba}'" "b\nc\n" \ "" "a\nSTART\nb\nc\nEND\nd" +testing '-z' 'sed -z "s/\n/-/g"' "a-b-c" "" "a\nb\nc" + # -i with $ last line test diff --git a/toys/posix/sed.c b/toys/posix/sed.c index bab6fb00..175e05d6 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -11,13 +11,13 @@ * What's the right thing to do for -i when write fails? Skip to next? * test '//q' with no previous regex, also repeat previous regex? -USE_SED(NEWTOY(sed, "(help)(version)e*f*i:;nEr[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLAG_NOHELP)) +USE_SED(NEWTOY(sed, "(help)(version)e*f*i:;nErz(null-data)[+Er]", TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LOCALE|TOYFLAG_NOHELP)) config SED bool "sed" default y help - usage: sed [-inrE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...] + usage: sed [-inrzE] [-e SCRIPT]...|SCRIPT [-f SCRIPT_FILE]... [FILE...] Stream editor. Apply one or more editing SCRIPTs to each line of input (from FILE or stdin) producing output (by default to stdout). @@ -27,8 +27,9 @@ config SED -i Edit each file in place (-iEXT keeps backup file with extension EXT) -n No default output (use the p command to output matched lines) -r Use extended regular expression syntax - -E Alias for -r + -E POSIX alias for -r -s Treat input files separately (implied by -i) + -z Use \0 rather than \n as the input line separator A SCRIPT is a series of one or more COMMANDs separated by newlines or semicolons. All -e SCRIPTs are concatenated together as if separated @@ -159,11 +160,11 @@ config SED # Comment, ignore rest of this line of SCRIPT - Deviations from posix: allow extended regular expressions with -r, - editing in place with -i, separate with -s, printf escapes in text, line - continuations, semicolons after all commands, 2-address anywhere an - address is allowed, "T" command, multiline continuations for [abc], - \; to end [abc] argument before end of line. + Deviations from POSIX: allow extended regular expressions with -r, + editing in place with -i, separate with -s, NUL-separated input with -z, + printf escapes in text, line continuations, semicolons after all commands, + 2-address anywhere an address is allowed, "T" command, multiline + continuations for [abc], \; to end [abc] argument before end of line. */ #define FOR_sed @@ -181,6 +182,7 @@ GLOBALS( long nextlen, rememberlen, count; int fdout, noeol; unsigned xx; + char delim; ) // Linked list of parsed sed commands. Offset fields indicate location where @@ -639,7 +641,7 @@ static void do_sed_file(int fd, char *name) for (command = (void *)TT.pattern; command; command = command->next) command->hit = 0; } - do_lines(fd, sed_line); + do_lines(fd, TT.delim, sed_line); if (FLAG(i)) { if (TT.i && *TT.i) { char *s = xmprintf("%s%s", name, TT.i); @@ -998,6 +1000,8 @@ void sed_main(void) struct arg_list *al; char **args = toys.optargs; + if (!FLAG(z)) TT.delim = '\n'; + // Lie to autoconf when it asks stupid questions, so configure regexes // that look for "GNU sed version %f" greater than some old buggy number // don't fail us for not matching their narrow expectations. @@ -1022,7 +1026,8 @@ void sed_main(void) for (al = TT.e; al; al = al->next) parse_pattern(&al->arg, strlen(al->arg)); parse_pattern(0, 0); - for (al = TT.f; al; al = al->next) do_lines(xopenro(al->arg), parse_pattern); + for (al = TT.f; al; al = al->next) + do_lines(xopenro(al->arg), TT.delim, parse_pattern); dlist_terminate(TT.pattern); if (TT.nextlen) error_exit("no }"); -- cgit v1.2.3