From ed3d5eb0eaf74e6686bc2576b2c4d5a5343dfd57 Mon Sep 17 00:00:00 2001 From: Elliott Hughes Date: Sat, 5 Dec 2020 17:41:39 -0800 Subject: unicode: new toy. Based loosely on the Plan9/Inferno utility, and a convenient way to go back and forth between code points and utf8 sequences. This patch also fixes a couple of bugs in wctoutf8 (and the tests for this toy effectively serve as unit tests for wctoutf8/utf8towc). --- lib/lib.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'lib') diff --git a/lib/lib.c b/lib/lib.c index 7786fcc1..3129e3e3 100644 --- a/lib/lib.c +++ b/lib/lib.c @@ -349,17 +349,18 @@ int stridx(char *haystack, char needle) // Convert wc to utf8, returning bytes written. Does not null terminate. int wctoutf8(char *s, unsigned wc) { - int len = (wc>0x7ff)+(wc>0xffff), mask = 12+len+!!len; + int len = (wc>0x7ff)+(wc>0xffff), i; if (wc<128) { *s = wc; return 1; } else { + i = len; do { - s[1+len] = 0x80+(wc&0x3f); - wc >>= 7; - } while (len--); - *s = wc|mask; + s[1+i] = 0x80+(wc&0x3f); + wc >>= 6; + } while (i--); + *s = (((signed char) 0x80) >> (len+1)) | wc; } return 2+len; -- cgit v1.2.3