diff options
author | Elliott Hughes <enh@google.com> | 2020-12-05 17:41:39 -0800 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2020-12-06 02:24:12 -0600 |
commit | ed3d5eb0eaf74e6686bc2576b2c4d5a5343dfd57 (patch) | |
tree | e0116710fb056cee84f021d8b2c230806679b65b /lib | |
parent | 49c02dbe435681015a88c636749d144044fc5e4a (diff) | |
download | toybox-ed3d5eb0eaf74e6686bc2576b2c4d5a5343dfd57.tar.gz |
unicode: new toy.
Based loosely on the Plan9/Inferno utility, and a convenient way to go back
and forth between code points and utf8 sequences.
This patch also fixes a couple of bugs in wctoutf8 (and the tests for this
toy effectively serve as unit tests for wctoutf8/utf8towc).
Diffstat (limited to 'lib')
-rw-r--r-- | lib/lib.c | 11 |
1 files changed, 6 insertions, 5 deletions
@@ -349,17 +349,18 @@ int stridx(char *haystack, char needle) // Convert wc to utf8, returning bytes written. Does not null terminate. int wctoutf8(char *s, unsigned wc) { - int len = (wc>0x7ff)+(wc>0xffff), mask = 12+len+!!len; + int len = (wc>0x7ff)+(wc>0xffff), i; if (wc<128) { *s = wc; return 1; } else { + i = len; do { - s[1+len] = 0x80+(wc&0x3f); - wc >>= 7; - } while (len--); - *s = wc|mask; + s[1+i] = 0x80+(wc&0x3f); + wc >>= 6; + } while (i--); + *s = (((signed char) 0x80) >> (len+1)) | wc; } return 2+len; |