aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2020-12-06 00:02:46 -0600
committerRob Landley <rob@landley.net>2020-12-06 00:02:46 -0600
commit75b89012c90470a27d66b54ad89901b94fcfd169 (patch)
tree07c1b6954a186511f016ca490c71ae0fbd2873f3 /lib
parent0e675d98f3aba7a834dc56c2ae419f20da376f44 (diff)
downloadtoybox-75b89012c90470a27d66b54ad89901b94fcfd169.tar.gz
Remove CONFIG_TOYBOX_I18N and just always support utf8.
Diffstat (limited to 'lib')
-rw-r--r--lib/lib.c54
1 files changed, 29 insertions, 25 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 413d0f73..7786fcc1 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -395,37 +395,41 @@ int utf8towc(wchar_t *wc, char *str, unsigned len)
return str-s;
}
+// Convert string to lower case, utf8 aware.
char *strlower(char *s)
{
char *try, *new;
+ int len, mlen = (strlen(s)|7)+9;
+ wchar_t c;
- if (!CFG_TOYBOX_I18N) {
- try = new = xstrdup(s);
- for (; *s; s++) *(new++) = tolower(*s);
- } else {
- // I can't guarantee the string _won't_ expand during reencoding, so...?
- try = new = xmalloc(strlen(s)*2+1);
-
- while (*s) {
- wchar_t c;
- int len = utf8towc(&c, s, MB_CUR_MAX);
-
- if (len < 1) *(new++) = *(s++);
- else {
- s += len;
- // squash title case too
- c = towlower(c);
-
- // if we had a valid utf8 sequence, convert it to lower case, and can't
- // encode back to utf8, something is wrong with your libc. But just
- // in case somebody finds an exploit...
- len = wcrtomb(new, c, 0);
- if (len < 1) error_exit("bad utf8 %x", (int)c);
- new += len;
- }
+ try = new = xmalloc(mlen);
+
+ while (*s) {
+
+ if (1>(len = utf8towc(&c, s, MB_CUR_MAX))) {
+ *(new++) = *(s++);
+
+ continue;
}
- *new = 0;
+
+ s += len;
+ // squash title case too
+ c = towlower(c);
+
+ // if we had a valid utf8 sequence, convert it to lower case, and can't
+ // encode back to utf8, something is wrong with your libc. But just
+ // in case somebody finds an exploit...
+ len = wcrtomb(new, c, 0);
+ if (len < 1) error_exit("bad utf8 %x", (int)c);
+ new += len;
+
+ // Case conversion can expand utf8 representation, but with extra mlen
+ // space above we should basically never need to realloc
+ if (mlen+4 > (len = new-try)) continue;
+ try = xrealloc(try, mlen = len+16);
+ new = try+len;
}
+ *new = 0;
return try;
}