From 50d8ed89b1e02917e83a33b3d62f465192dd500f Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Thu, 3 Dec 2020 16:38:26 -0800 Subject: sed: add 'x' flag to the 's' command. The GNU tar manual, when talking about the `tar --transform` option that I need to implement, describes the 'x' flag by saying "regexp is an extended regular expression (see section 'Extended regular expressions' in GNU sed)". Only it turns out that even the latest GNU sed doesn't actually have that flag. It's unique to `tar --transform`. That link is just telling you that the sed manual will explain extended regular expressions, not that GNU sed also supports the 'x' flag. So I don't know whether we want this in toybox sed after all. (It made sense that sed would have such a flag, but no sed that I know of actually does.) --- tests/sed.test | 2 ++ toys/posix/sed.c | 16 +++++++++------- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/tests/sed.test b/tests/sed.test index beb11d5c..c3928e58 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -185,6 +185,8 @@ testing '\n with empty capture' \ testing '\n too high' \ 'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo" +toyonly testing 's///x' 'sed "s/(hello )?(world)/\2/x"' "world" "" "hello world" + # Performance test X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done testing 'megabyte s/x/y/g (20 sec timeout)' \ diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 8fbef0cb..9bd05034 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -147,7 +147,7 @@ struct sedcmd { int rmatch[2]; // offset of regex struct for prefix matches (/abc/,/def/p) int arg1, arg2, w; // offset of two arguments per command, plus s//w filename unsigned not, hit; - unsigned sflags; // s///flag bits: i=1, g=2, p=4 + unsigned sflags; // s///flag bits: i=1, g=2, p=4, x=8 char c; // action }; @@ -441,7 +441,7 @@ static void sed_line(char **pline, long plen) } else zmatch = 0; // If we're replacing only a specific match, skip if this isn't it - off = command->sflags>>3; + off = command->sflags>>4; if (off && off != ++count) { memcpy(l2+l2used, rline, match[0].rm_eo); l2used += match[0].rm_eo; @@ -793,6 +793,7 @@ static void parse_pattern(char **pline, long len) if (!TT.nextlen--) break; } else if (c == 's') { char *end, delim = 0; + int flags; // s/pattern/replacement/flags @@ -845,19 +846,20 @@ resume_s: if (isspace(*line) && *line != '\n') continue; - if (0 <= (l = stridx("igp", *line))) command->sflags |= 1<sflags |= 1<sflags |= 1<<0; - else if (!(command->sflags>>3) && 0<(l = strtol(line, &line, 10))) { - command->sflags |= l << 3; + else if (!(command->sflags>>4) && 0<(l = strtol(line, &line, 10))) { + command->sflags |= l << 4; line--; } else break; } + flags = (FLAG(r) || (command->sflags&8)) ? REG_EXTENDED : 0; + if (command->sflags&1) flags |= REG_ICASE; // We deferred actually parsing the regex until we had the s///i flag // allocating the space was done by extend_string() above if (!*TT.remember) command->arg1 = 0; - else xregcomp((void *)(command->arg1 + (char *)command), TT.remember, - (REG_EXTENDED*!!FLAG(r))|((command->sflags&1)*REG_ICASE)); + else xregcomp((void *)(command->arg1+(char *)command),TT.remember,flags); free(TT.remember); TT.remember = 0; if (*line == 'w') { -- cgit v1.2.3