aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2017-09-05 02:36:24 -0500
committerRob Landley <rob@landley.net>2017-09-05 02:36:24 -0500
commitb3e70932b6534e603b03d28c45133b8f991f48fe (patch)
treeda3eff855103f5efd8ae82302bcd8b941c3611c7 /lib
parent6e766936396e2da7fb3820cadb3a9ae823caa9a8 (diff)
downloadtoybox-b3e70932b6534e603b03d28c45133b8f991f48fe.tar.gz
Tweak utf8towc() to return -1 earlier sometimes (instead of -2), and add test
program to compare against libc output.
Diffstat (limited to 'lib')
-rw-r--r--lib/lib.c9
1 files changed, 3 insertions, 6 deletions
diff --git a/lib/lib.c b/lib/lib.c
index c482dcab..a4b7229b 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -345,20 +345,17 @@ int utf8towc(wchar_t *wc, char *str, unsigned len)
if (len && *str<128) return !!(*wc = *str);
result = first = *(s = str++);
+ if (result<0xc2 || result>0xf4) return -1;
for (mask = 6; (first&0xc0)==0xc0; mask += 5, first <<= 1) {
- if (mask>21) return -1;
if (!--len) return -2;
- c = *(str++);
- if ((c&0xc0) != 0x80) return -1;
+ if (((c = *(str++))&0xc0) != 0x80) return -1;
result = (result<<6)|(c&0x3f);
}
result &= (1<<mask)-1;
c = str-s;
- if (mask==6) return -1;
// Avoid overlong encodings
- if (mask==6 || mask>21 || result<(unsigned []){0x80,0x800,0x10000}[c-2])
- return -1;
+ if (result<(unsigned []){0x80,0x800,0x10000}[c-2]) return -1;
// Limit unicode so it can't encode anything UTF-16 can't.
if (result>0x10ffff || (result>=0xd800 && result<=0xdfff)) return -1;