diff options
-rw-r--r-- | Config.in | 6 | ||||
-rw-r--r-- | lib/lib.c | 54 | ||||
-rw-r--r-- | main.c | 9 | ||||
-rw-r--r-- | toys.h | 1 | ||||
-rw-r--r-- | toys/example/demo_utf8towc.c | 1 | ||||
-rw-r--r-- | toys/posix/expand.c | 24 |
6 files changed, 44 insertions, 51 deletions
@@ -106,12 +106,6 @@ config TOYBOX_HELP_DASHDASH optstring. (Use TOYFLAG_NOHELP to disable.) Produces the same output as "help command". --version shows toybox version. -config TOYBOX_I18N - bool "Internationalization support" - default y - help - Support for UTF-8 character sets, and some locale support. - config TOYBOX_FREE bool "Free memory unnecessarily" default n @@ -395,37 +395,41 @@ int utf8towc(wchar_t *wc, char *str, unsigned len) return str-s; } +// Convert string to lower case, utf8 aware. char *strlower(char *s) { char *try, *new; + int len, mlen = (strlen(s)|7)+9; + wchar_t c; - if (!CFG_TOYBOX_I18N) { - try = new = xstrdup(s); - for (; *s; s++) *(new++) = tolower(*s); - } else { - // I can't guarantee the string _won't_ expand during reencoding, so...? - try = new = xmalloc(strlen(s)*2+1); - - while (*s) { - wchar_t c; - int len = utf8towc(&c, s, MB_CUR_MAX); - - if (len < 1) *(new++) = *(s++); - else { - s += len; - // squash title case too - c = towlower(c); - - // if we had a valid utf8 sequence, convert it to lower case, and can't - // encode back to utf8, something is wrong with your libc. But just - // in case somebody finds an exploit... - len = wcrtomb(new, c, 0); - if (len < 1) error_exit("bad utf8 %x", (int)c); - new += len; - } + try = new = xmalloc(mlen); + + while (*s) { + + if (1>(len = utf8towc(&c, s, MB_CUR_MAX))) { + *(new++) = *(s++); + + continue; } - *new = 0; + + s += len; + // squash title case too + c = towlower(c); + + // if we had a valid utf8 sequence, convert it to lower case, and can't + // encode back to utf8, something is wrong with your libc. But just + // in case somebody finds an exploit... + len = wcrtomb(new, c, 0); + if (len < 1) error_exit("bad utf8 %x", (int)c); + new += len; + + // Case conversion can expand utf8 representation, but with extra mlen + // space above we should basically never need to realloc + if (mlen+4 > (len = new-try)) continue; + try = xrealloc(try, mlen = len+16); + new = try+len; } + *new = 0; return try; } @@ -98,11 +98,10 @@ void toy_singleinit(struct toy_list *which, char *argv[]) if (!(which->flags & TOYFLAG_NOFORK)) { toys.old_umask = umask(0); if (!(which->flags & TOYFLAG_UMASK)) umask(toys.old_umask); - if (CFG_TOYBOX_I18N) { - // Deliberately try C.UTF-8 before the user's locale to work around users - // that choose non-UTF-8 locales. macOS doesn't support C.UTF-8 though. - if (!setlocale(LC_CTYPE, "C.UTF-8")) setlocale(LC_CTYPE, ""); - } + + // Try user's locale, falling back to C.UTF-8 + setlocale(LC_CTYPE, ""); + if (!strcmp("UTF-8", nl_langinfo(CODESET))) setlocale(LC_CTYPE, "C.UTF-8"); setlinebuf(stdout); } } @@ -58,6 +58,7 @@ // Internationalization support (also in POSIX and LSB) +#include <langinfo.h> #include <locale.h> #include <wchar.h> #include <wctype.h> diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c index 25737856..c0522543 100644 --- a/toys/example/demo_utf8towc.c +++ b/toys/example/demo_utf8towc.c @@ -6,7 +6,6 @@ USE_DEMO_UTF8TOWC(NEWTOY(demo_utf8towc, 0, TOYFLAG_USR|TOYFLAG_BIN)) config DEMO_UTF8TOWC bool "demo_utf8towc" - depends on TOYBOX_I18N default n help usage: demo_utf8towc diff --git a/toys/posix/expand.c b/toys/posix/expand.c index f1fd8d33..f3cd44d0 100644 --- a/toys/posix/expand.c +++ b/toys/posix/expand.c @@ -43,22 +43,18 @@ static void do_expand(int fd, char *name) } if (!len) break; for (i=0; i<len; i++) { - int width = 1; + wchar_t blah; + int width = utf8towc(&blah, toybuf+i, len-i); char c; - if (CFG_TOYBOX_I18N) { - wchar_t blah; - - width = utf8towc(&blah, toybuf+i, len-i); - if (width > 1) { - if (width != fwrite(toybuf+i, width, 1, stdout)) - perror_exit("stdout"); - i += width-1; - x++; - continue; - } else if (width == -2) break; - else if (width == -1) continue; - } + if (width > 1) { + if (width != fwrite(toybuf+i, width, 1, stdout)) + perror_exit("stdout"); + i += width-1; + x++; + continue; + } else if (width == -2) break; + else if (width == -1) continue; c = toybuf[i]; if (c != '\t') { |