diff options
-rw-r--r-- | libbb/unicode.c | 36 |
1 files changed, 14 insertions, 22 deletions
diff --git a/libbb/unicode.c b/libbb/unicode.c index 773a0744e..3519984d9 100644 --- a/libbb/unicode.c +++ b/libbb/unicode.c @@ -48,7 +48,7 @@ void FAST_FUNC check_unicode_in_env(void) static size_t wcrtomb_internal(char *s, wchar_t wc) { - int n; + int n, i; uint32_t v = wc; if (v <= 0x7f) { @@ -59,34 +59,26 @@ static size_t wcrtomb_internal(char *s, wchar_t wc) /* RFC 3629 says that Unicode ends at 10FFFF, * but we cover entire 32 bits */ - n = 2; /* 4000000-FFFFFFFF -> 111111tt 10tttttt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ - if (v >= 0x4000000) { - s[5] = (wc & 0x3f) | 0x80; - wc = (uint32_t)wc >> 6; /* ensuring that high bits are 0 */ - n++; - } /* 200000-3FFFFFF -> 111110tt 10zzzzzz 10zzyyyy 10yyyyxx 10xxxxxx */ - if (v >= 0x200000) { - s[4] = (wc & 0x3f) | 0x80; - wc >>= 6; - n++; - } /* 10000-1FFFFF -> 11110zzz 10zzyyyy 10yyyyxx 10xxxxxx */ - if (v >= 0x10000) { - s[3] = (wc & 0x3f) | 0x80; - wc >>= 6; + /* 800-FFFF -> 1110yyyy 10yyyyxx 10xxxxxx */ + /* 80-7FF -> 110yyyxx 10xxxxxx */ + + /* How many bytes do we need? */ + n = 2; + /* (0x80000000+ would result in n = 7, limiting n to 6) */ + while (v >= 0x800 && n < 6) { + v >>= 5; n++; } - /* 800-FFFF -> 1110yyyy 10yyyyxx 10xxxxxx */ - if (v >= 0x800) { - s[2] = (wc & 0x3f) | 0x80; + /* Fill bytes n-1..1 */ + i = n; + while (--i) { + s[i] = (wc & 0x3f) | 0x80; wc >>= 6; - n++; } - /* 80-7FF -> 110yyyxx 10xxxxxx */ - s[1] = (wc & 0x3f) | 0x80; - wc >>= 6; + /* Fill byte 0 */ s[0] = wc | (uint8_t)(0x3f00 >> n); return n; } |