diff options
author | Rob Landley <rob@landley.net> | 2017-09-02 20:40:24 -0500 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2017-09-02 20:40:24 -0500 |
commit | 6e766936396e2da7fb3820cadb3a9ae823caa9a8 (patch) | |
tree | 2f451efca683de7858d201fb8de7b3719f7429df /lib/lib.c | |
parent | 67ddade3373d0fefeff25b48430e5f08c3a7711b (diff) | |
download | toybox-6e766936396e2da7fb3820cadb3a9ae823caa9a8.tar.gz |
utf8towc() has to be in lib.c if strlower() is going to use it, because
scripts/*.c builds against lib.c but not linestack.c.
Diffstat (limited to 'lib/lib.c')
-rw-r--r-- | lib/lib.c | 34 |
1 files changed, 33 insertions, 1 deletions
@@ -335,6 +335,38 @@ int stridx(char *haystack, char needle) return off-haystack; } +// Convert utf8 sequence to a unicode wide character +int utf8towc(wchar_t *wc, char *str, unsigned len) +{ + unsigned result, mask, first; + char *s, c; + + // fast path ASCII + if (len && *str<128) return !!(*wc = *str); + + result = first = *(s = str++); + for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) { + if (mask>21) return -1; + if (!--len) return -2; + c = *(str++); + if ((c&0xc0) != 0x80) return -1; + result = (result<<6)|(c&0x3f); + } + result &= (1<<mask)-1; + c = str-s; + if (mask==6) return -1; + + // Avoid overlong encodings + if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2]) + return -1; + + // Limit unicode so it can't encode anything UTF-16 can't. + if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1; + *wc = result; + + return str-s; +} + char *strlower(char *s) { char *try, *new; @@ -348,7 +380,7 @@ char *strlower(char *s) while (*s) { wchar_t c; - int len = mbrtowc(&c, s, MB_CUR_MAX, 0); + int len = utf8towc(&c, s, MB_CUR_MAX); if (len < 1) *(new++) = *(s++); else { |