diff options
author | Elliott Hughes <enh@google.com> | 2019-02-12 16:29:09 -0800 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2019-02-12 21:31:20 -0600 |
commit | 7079a558d6e9448d5434965985b650c1e572140e (patch) | |
tree | 5421ca8fe368722a744a7a5c906b05a6a5780c55 | |
parent | 8326fe1f7e141f49a46a57acc6221651298b42c9 (diff) | |
download | toybox-7079a558d6e9448d5434965985b650c1e572140e.tar.gz |
sed: fix substitution of empty capturing groups.
The test for \N where N was larger than the number of capturing groups
in the regular expression was incorrect, and firing for cases such as
matching __(ARM_)?NR_([a-z]*) against __NR_read, where the first group is
empty (because it failed to match) but the second group did match "read".
Use regex_t's re_nsub for the error check, and treat rm_so == -1 as a
signal to just copy nothing into the result.
(Found trying to build minijail in AOSP.)
-rwxr-xr-x | tests/sed.test | 11 | ||||
-rw-r--r-- | toys/posix/sed.c | 9 |
2 files changed, 17 insertions, 3 deletions
diff --git a/tests/sed.test b/tests/sed.test index f2ff8fd7..34dfa161 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -72,6 +72,8 @@ testing "aci" \ "sed -e '3a boom' -e '/hre/i bang' -e '3a whack' -e '3c bong'" \ "one\ntwo\nbang\nbong\nboom\nwhack\nfour\n" "" \ "one\ntwo\nthree\nfour\n" +# TODO: next test is broken on new-ish debian/bash with never-ending +# output of `sed: short write: Broken pipe`. testing "b loop" "sed ':woo;=;b woo' | head -n 5" '1\n1\n1\n1\n1\n' "" "X" testing "b skip" "sed -n '2b zap;d;:zap;p'" "two\n" "" "one\ntwo\nthree" testing "b end" "sed -n '2b;p'" "one\nthree" "" "one\ntwo\nthree" @@ -167,4 +169,13 @@ testing "end b with }" "sed -n '/START/{:a;n;/END/q;p;ba}'" "b\nc\n" \ testing '-z' 'sed -z "s/\n/-/g"' "a-b-c" "" "a\nb\nc" +# toybox handling of empty capturing groups broke minjail. Check that we +# correctly replace an empty capturing group with the empty string: +testing '\n with empty capture' \ + 'sed -E "s/(ARM_)?(NR_)([a-z]*) (.*)/\1\2\3/"' "NR_read" "" "NR_read foo" +# ...but also that we report an error for a backreference to a group that +# isn't in the pattern: +testing '\n too high' \ + 'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo" + # -i with $ last line test diff --git a/toys/posix/sed.c b/toys/posix/sed.c index 228055f9..0be2165e 100644 --- a/toys/posix/sed.c +++ b/toys/posix/sed.c @@ -528,15 +528,18 @@ static void sed_line(char **pline, long plen) rswap[mlen-1] = new[off]; continue; - } else if (match[cc].rm_so == -1) error_exit("no s//\\%d/", cc); + } else if (cc > reg->re_nsub) error_exit("no s//\\%d/", cc); } else if (new[off] != '&') { rswap[mlen++] = new[off]; continue; } - ll = match[cc].rm_eo-match[cc].rm_so; - memcpy(rswap+mlen, rline+match[cc].rm_so, ll); + if (match[cc].rm_so == -1) ll = 0; // Empty match. + else { + ll = match[cc].rm_eo-match[cc].rm_so; + memcpy(rswap+mlen, rline+match[cc].rm_so, ll); + } mlen += ll; } |