/* unicode.c - convert between Unicode and UTF-8 * * Copyright 2020 The Android Open Source Project. * * Loosely based on the Plan9/Inferno unicode(1). USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN)) config UNICODE bool "unicode" default n help usage: unicode [[min]-max] Convert between Unicode code points and UTF-8, in both directions. */ #define FOR_unicode #include "toys.h" static void codepoint(unsigned wc) { char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4" "NAKSYNETBCANEM SUBESCFS GS RS US "; unsigned n, i; printf("U+%04X : ", wc); if (wc < ' ') printf("%.3s", low+(wc*3)); else if (wc == 0x7f) printf("DEL"); else { toybuf[n = wctoutf8(toybuf, wc)] = 0; printf("%s%s", toybuf, n>1 ? " :":""); if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]); } xputc('\n'); } void unicode_main(void) { unsigned from, to; char next, **args; for (args = toys.optargs; *args; args++) { // unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc. if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) { while (from <= to) codepoint(from++); // unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`. } else if (sscanf(*args, "%x%c", &from, &next) == 1) { codepoint(from); // unicode hello => table showing every character in the string. } else { char *s = *args; size_t l = strlen(s); wchar_t wc; int n; while ((n = utf8towc(&wc, s, l)) > 0) { codepoint(wc); s += n; l -= n; } } } }