diff options
author | Rob Landley <rob@landley.net> | 2017-09-05 02:36:24 -0500 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2017-09-05 02:36:24 -0500 |
commit | b3e70932b6534e603b03d28c45133b8f991f48fe (patch) | |
tree | da3eff855103f5efd8ae82302bcd8b941c3611c7 /lib/lib.c | |
parent | 6e766936396e2da7fb3820cadb3a9ae823caa9a8 (diff) | |
download | toybox-b3e70932b6534e603b03d28c45133b8f991f48fe.tar.gz |
Tweak utf8towc() to return -1 earlier sometimes (instead of -2), and add test
program to compare against libc output.
Diffstat (limited to 'lib/lib.c')
-rw-r--r-- | lib/lib.c | 9 |
1 files changed, 3 insertions, 6 deletions
@@ -345,20 +345,17 @@ int utf8towc(wchar_t *wc, char *str, unsigned len) if (len && *str<128) return !!(*wc = *str); result = first = *(s = str++); + if (result<0xc2 || result>0xf4) return -1; for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) { - if (mask>21) return -1; if (!--len) return -2; - c = *(str++); - if ((c&0xc0) != 0x80) return -1; + if (((c = *(str++))&0xc0) != 0x80) return -1; result = (result<<6)|(c&0x3f); } result &= (1<<mask)-1; c = str-s; - if (mask==6) return -1; // Avoid overlong encodings - if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2]) - return -1; + if (result<(unsigned []){0x80,0x800,0x10000}[c-2]) return -1; // Limit unicode so it can't encode anything UTF-16 can't. if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1; |