From 337c072ac09a94a4a47d81cb214b3b8e55eee621 Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Tue, 6 Sep 2016 00:14:24 -0500 Subject: Teach sed s/// how to handle [:space:] type sequences. Or more accurately, s@[[:space:]@]@replace@ which can't treat the @ in [] as a delimiter but has to know about nested [[]] to make that decision. --- tests/sed.test | 2 ++ toys/posix/sed.c | 21 ++++++++++++++++----- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/tests/sed.test b/tests/sed.test index d25148ed..f48b73e6 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -57,6 +57,8 @@ testing 'multiple regex address match' 'sed -n /on/,/off/p' \ 'zap\nbone\nturtle\scoff\nfred\ntron\nlurid\noffer\nbecause\n' testing 'regex address overlap' 'sed -n /on/,/off/p' "on\nzap\noffon\n" "" \ 'on\nzap\noffon\nping\noff\n' +testing 'getdelim with nested [:blah:]' 'sed -n "sa\a[a[:space:]bc]*aXXagp"' \ + "ABXXCDXXEFXXGHXXIXX" "" "ABaaCDa EFaa aGHa a Ia " # gGhHlnNpPqrstwxy:= # s///#comment diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 744edf7b..31268ece 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -657,6 +657,7 @@ static char *unescape_delimited_string(char **pstr, char *delim) { char *to, *from, mode = 0, d; + // Grab leading delimiter (if necessary), allocate space for new string from = *pstr; if (!delim || !*delim) { if (!(d = *(from++))) return 0; @@ -670,13 +671,23 @@ static char *unescape_delimited_string(char **pstr, char *delim) if (!*from) return 0; // delimiter in regex character range doesn't count - if (!mode && *from == '[') { - mode = '['; - if (from[1]=='-' || from[1]==']') *(to++) = *(from++); - } else if (mode && *from == ']') mode = 0; + if (*from == '[') { + if (!mode) { + mode = ']'; + if (from[1]=='-' || from[1]==']') *(to++) = *(from++); + } else if (mode == ']' && strchr(".=:", from[1])) { + *(to++) = *(from++); + mode = *from; + } + } else if (*from == mode) { + if (mode == ']') mode = 0; + else { + *(to++) = *(from++); + mode = ']'; + } // Length 1 range (X-X with same X) is "undefined" and makes regcomp err, // but the perl build does it, so we need to filter it out. - else if (mode && *from == '-' && from[-1] == from[1]) { + } else if (mode && *from == '-' && from[-1] == from[1]) { from+=2; continue; } else if (*from == '\\') { -- cgit v1.2.3