diff options
author | Rob Landley <rob@landley.net> | 2019-05-06 13:16:24 -0500 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2019-05-06 13:16:24 -0500 |
commit | 48162c4ee3fb013c09cecea52c6403a33526f172 (patch) | |
tree | 26df11e4431a09f43784886fcc6199f2ec681b8a | |
parent | eb318d5b03223a8f2c5641ee45f9b28647bd3f47 (diff) | |
download | toybox-48162c4ee3fb013c09cecea52c6403a33526f172.tar.gz |
Greatly simplify and speed up regexec0() using REG_STARTEND.
This is a 15 year old freebsd extension (presumably thus also available on
MacOS) that glibc adopted in 2004, uClibc adopted in 2005, and bionic
supports. The only thing that DOESN'T support it is musl, once again
because its maintainer explicitly decided not to
(https://www.openwall.com/lists/musl/2013/01/15/26), so add an #ifdef
to let musl stay uniquely broken. (It'll stop at first NUL, everything
else can match NULs).
Finally fixes "s/x/y/g on a megabyte line of x's takes forever" issue.
-rw-r--r-- | lib/lib.c | 37 | ||||
-rw-r--r-- | lib/portability.h | 3 | ||||
-rwxr-xr-x | tests/sed.test | 6 |
3 files changed, 16 insertions, 30 deletions
@@ -1317,39 +1317,16 @@ int readlink0(char *path, char *buf, int len) return readlinkat0(AT_FDCWD, path, buf, len); } -// Do regex matching handling embedded NUL bytes in string (hence extra len -// argument). Note that neither the pattern nor the match can currently include -// NUL bytes (even with wildcards) and string must be null terminated at -// string[len]. But this can find a match after the first NUL. +// Do regex matching with len argument to handle embedded NUL bytes in string int regexec0(regex_t *preg, char *string, long len, int nmatch, - regmatch_t pmatch[], int eflags) + regmatch_t *pmatch, int eflags) { - char *s = string; + regmatch_t backup; - for (;;) { - int rc = regexec(preg, s, nmatch, pmatch, eflags); - - // check for match - if (!rc) { - for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) { - pmatch[rc].rm_so += s-string; - pmatch[rc].rm_eo += s-string; - } - - return 0; - } - - // advance past NUL bytes and try again - while (len && *s) { - s++; - len--; - } - while (len && !*s) { - s++; - len--; - } - if (!len) return REG_NOMATCH; - } + if (!nmatch) pmatch = &backup; + pmatch->rm_so = 0; + pmatch->rm_eo = len; + return regexec(preg, string, nmatch, pmatch, eflags|REG_STARTEND); } // Return user name or string representation of number, returned buffer diff --git a/lib/portability.h b/lib/portability.h index 96458266..ccb1b1c5 100644 --- a/lib/portability.h +++ b/lib/portability.h @@ -6,6 +6,9 @@ // For musl #define _ALL_SOURCE +#ifndef REG_STARTEND +#define REG_STARTEND 0 +#endif #ifdef __APPLE__ // macOS 10.13 doesn't have the POSIX 2008 direct access to timespec in diff --git a/tests/sed.test b/tests/sed.test index 6b27fff8..e5ec11bd 100755 --- a/tests/sed.test +++ b/tests/sed.test @@ -176,4 +176,10 @@ testing '\n with empty capture' \ testing '\n too high' \ 'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo" +# Performance test +X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done +testing 'megabyte s/x/y/g (5 sec timeout)' "timeout 5 sed 's/x/y/g' | sha1sum" \ + '138c1fa7c3f64186203b0192fb4abdb33cb4e98a -\n' '' "$X\n" +unset X Y + # -i with $ last line test |