aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2019-05-06 13:16:24 -0500
committerRob Landley <rob@landley.net>2019-05-06 13:16:24 -0500
commit48162c4ee3fb013c09cecea52c6403a33526f172 (patch)
tree26df11e4431a09f43784886fcc6199f2ec681b8a
parenteb318d5b03223a8f2c5641ee45f9b28647bd3f47 (diff)
downloadtoybox-48162c4ee3fb013c09cecea52c6403a33526f172.tar.gz
Greatly simplify and speed up regexec0() using REG_STARTEND.
This is a 15 year old freebsd extension (presumably thus also available on MacOS) that glibc adopted in 2004, uClibc adopted in 2005, and bionic supports. The only thing that DOESN'T support it is musl, once again because its maintainer explicitly decided not to (https://www.openwall.com/lists/musl/2013/01/15/26), so add an #ifdef to let musl stay uniquely broken. (It'll stop at first NUL, everything else can match NULs). Finally fixes "s/x/y/g on a megabyte line of x's takes forever" issue.
-rw-r--r--lib/lib.c37
-rw-r--r--lib/portability.h3
-rwxr-xr-xtests/sed.test6
3 files changed, 16 insertions, 30 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 6df9566c..9df04906 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -1317,39 +1317,16 @@ int readlink0(char *path, char *buf, int len)
return readlinkat0(AT_FDCWD, path, buf, len);
}
-// Do regex matching handling embedded NUL bytes in string (hence extra len
-// argument). Note that neither the pattern nor the match can currently include
-// NUL bytes (even with wildcards) and string must be null terminated at
-// string[len]. But this can find a match after the first NUL.
+// Do regex matching with len argument to handle embedded NUL bytes in string
int regexec0(regex_t *preg, char *string, long len, int nmatch,
- regmatch_t pmatch[], int eflags)
+ regmatch_t *pmatch, int eflags)
{
- char *s = string;
+ regmatch_t backup;
- for (;;) {
- int rc = regexec(preg, s, nmatch, pmatch, eflags);
-
- // check for match
- if (!rc) {
- for (rc = 0; rc<nmatch && pmatch[rc].rm_so!=-1; rc++) {
- pmatch[rc].rm_so += s-string;
- pmatch[rc].rm_eo += s-string;
- }
-
- return 0;
- }
-
- // advance past NUL bytes and try again
- while (len && *s) {
- s++;
- len--;
- }
- while (len && !*s) {
- s++;
- len--;
- }
- if (!len) return REG_NOMATCH;
- }
+ if (!nmatch) pmatch = &backup;
+ pmatch->rm_so = 0;
+ pmatch->rm_eo = len;
+ return regexec(preg, string, nmatch, pmatch, eflags|REG_STARTEND);
}
// Return user name or string representation of number, returned buffer
diff --git a/lib/portability.h b/lib/portability.h
index 96458266..ccb1b1c5 100644
--- a/lib/portability.h
+++ b/lib/portability.h
@@ -6,6 +6,9 @@
// For musl
#define _ALL_SOURCE
+#ifndef REG_STARTEND
+#define REG_STARTEND 0
+#endif
#ifdef __APPLE__
// macOS 10.13 doesn't have the POSIX 2008 direct access to timespec in
diff --git a/tests/sed.test b/tests/sed.test
index 6b27fff8..e5ec11bd 100755
--- a/tests/sed.test
+++ b/tests/sed.test
@@ -176,4 +176,10 @@ testing '\n with empty capture' \
testing '\n too high' \
'sed -E "s/(.*)/\2/p" 2>/dev/null || echo OK' "OK\n" "" "foo"
+# Performance test
+X=x; Y=20; while [ $Y -gt 0 ]; do X=$X$X; Y=$(($Y-1)); done
+testing 'megabyte s/x/y/g (5 sec timeout)' "timeout 5 sed 's/x/y/g' | sha1sum" \
+ '138c1fa7c3f64186203b0192fb4abdb33cb4e98a -\n' '' "$X\n"
+unset X Y
+
# -i with $ last line test