aboutsummaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorElliott Hughes <enh@google.com>2020-12-05 17:41:39 -0800
committerRob Landley <rob@landley.net>2020-12-06 02:24:12 -0600
commited3d5eb0eaf74e6686bc2576b2c4d5a5343dfd57 (patch)
treee0116710fb056cee84f021d8b2c230806679b65b /lib
parent49c02dbe435681015a88c636749d144044fc5e4a (diff)
downloadtoybox-ed3d5eb0eaf74e6686bc2576b2c4d5a5343dfd57.tar.gz
unicode: new toy.
Based loosely on the Plan9/Inferno utility, and a convenient way to go back and forth between code points and utf8 sequences. This patch also fixes a couple of bugs in wctoutf8 (and the tests for this toy effectively serve as unit tests for wctoutf8/utf8towc).
Diffstat (limited to 'lib')
-rw-r--r--lib/lib.c11
1 files changed, 6 insertions, 5 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 7786fcc1..3129e3e3 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -349,17 +349,18 @@ int stridx(char *haystack, char needle)
// Convert wc to utf8, returning bytes written. Does not null terminate.
int wctoutf8(char *s, unsigned wc)
{
- int len = (wc>0x7ff)+(wc>0xffff), mask = 12+len+!!len;
+ int len = (wc>0x7ff)+(wc>0xffff), i;
if (wc<128) {
*s = wc;
return 1;
} else {
+ i = len;
do {
- s[1+len] = 0x80+(wc&0x3f);
- wc >>= 7;
- } while (len--);
- *s = wc|mask;
+ s[1+i] = 0x80+(wc&0x3f);
+ wc >>= 6;
+ } while (i--);
+ *s = (((signed char) 0x80) >> (len+1)) | wc;
}
return 2+len;