Remove CONFIG_TOYBOX_I18N and just always support utf8.

author: Rob Landley <rob@landley.net> 2020-12-06 00:02:46 -0600
committer: Rob Landley <rob@landley.net> 2020-12-06 00:02:46 -0600
commit: 75b89012c90470a27d66b54ad89901b94fcfd169 (patch)
tree: 07c1b6954a186511f016ca490c71ae0fbd2873f3 /lib
parent: 0e675d98f3aba7a834dc56c2ae419f20da376f44 (diff)
download: toybox-75b89012c90470a27d66b54ad89901b94fcfd169.tar.gz
1 files changed, 29 insertions, 25 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 413d0f73..7786fcc1 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -395,37 +395,41 @@ int utf8towc(wchar_t *wc, char *str, unsigned len)
   return str-s;
 }
 
+// Convert string to lower case, utf8 aware.
 char *strlower(char *s)
 {
   char *try, *new;
+  int len, mlen = (strlen(s)|7)+9;
+  wchar_t c;
 
-  if (!CFG_TOYBOX_I18N) {
-    try = new = xstrdup(s);
-    for (; *s; s++) *(new++) = tolower(*s);
-  } else {
-    // I can't guarantee the string _won't_ expand during reencoding, so...?
-    try = new = xmalloc(strlen(s)*2+1);
-
-    while (*s) {
-      wchar_t c;
-      int len = utf8towc(&c, s, MB_CUR_MAX);
-
-      if (len < 1) *(new++) = *(s++);
-      else {
-        s += len;
-        // squash title case too
-        c = towlower(c);
-
-        // if we had a valid utf8 sequence, convert it to lower case, and can't
-        // encode back to utf8, something is wrong with your libc. But just
-        // in case somebody finds an exploit...
-        len = wcrtomb(new, c, 0);
-        if (len < 1) error_exit("bad utf8 %x", (int)c);
-        new += len;
-      }
+  try = new = xmalloc(mlen);
+
+  while (*s) {
+
+    if (1>(len = utf8towc(&c, s, MB_CUR_MAX))) {
+      *(new++) = *(s++);
+
+      continue;
     }
-    *new = 0;
+
+    s += len;
+    // squash title case too
+    c = towlower(c);
+
+    // if we had a valid utf8 sequence, convert it to lower case, and can't
+    // encode back to utf8, something is wrong with your libc. But just
+    // in case somebody finds an exploit...
+    len = wcrtomb(new, c, 0);
+    if (len < 1) error_exit("bad utf8 %x", (int)c);
+    new += len;
+
+    // Case conversion can expand utf8 representation, but with extra mlen
+    // space above we should basically never need to realloc
+    if (mlen+4 > (len = new-try)) continue;
+    try = xrealloc(try, mlen = len+16);
+    new = try+len;
   }
+  *new = 0;
 
   return try;
 }
author	Rob Landley <rob@landley.net>	2020-12-06 00:02:46 -0600
committer	Rob Landley <rob@landley.net>	2020-12-06 00:02:46 -0600
commit	75b89012c90470a27d66b54ad89901b94fcfd169 (patch)
tree	07c1b6954a186511f016ca490c71ae0fbd2873f3 /lib
parent	0e675d98f3aba7a834dc56c2ae419f20da376f44 (diff)
download	toybox-75b89012c90470a27d66b54ad89901b94fcfd169.tar.gz