aboutsummaryrefslogtreecommitdiff
path: root/toys/other
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2021-05-15 10:43:09 -0500
committerRob Landley <rob@landley.net>2021-05-15 10:43:09 -0500
commit3670ac7432ea5eb31f7432a20c596c1a990f9975 (patch)
tree95a211ccd3485be4baed32aa6223a83ac78478f6 /toys/other
parent16e2520bf8d3ac90012bbda150d39856064439c3 (diff)
downloadtoybox-3670ac7432ea5eb31f7432a20c596c1a990f9975.tar.gz
Promote unicode (merge into ascii.c)
Diffstat (limited to 'toys/other')
-rw-r--r--toys/other/ascii.c79
1 files changed, 62 insertions, 17 deletions
diff --git a/toys/other/ascii.c b/toys/other/ascii.c
index 42f73260..c9448a75 100644
--- a/toys/other/ascii.c
+++ b/toys/other/ascii.c
@@ -4,8 +4,15 @@
*
* Technically 7-bit ASCII is ANSI X3.4-1986, a standard available as
* INCITS 4-1986[R2012] on ansi.org, but they charge for it.
+ *
+ * unicode.c - convert between Unicode and UTF-8
+ *
+ * Copyright 2020 The Android Open Source Project.
+ *
+ * Loosely based on the Plan9/Inferno unicode(1).
-USE_ASCII(NEWTOY(ascii, 0, TOYFLAG_USR|TOYFLAG_BIN))
+USE_ASCII(NEWTOY(ascii, 0, TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LINEBUF))
+USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
config ASCII
bool "ascii"
@@ -14,27 +21,65 @@ config ASCII
usage: ascii
Display ascii character set.
+
+config UNICODE
+ bool "unicode"
+ default y
+ help
+ usage: unicode CODE[-END]...
+
+ Convert between Unicode code points and UTF-8, in both directions.
+ CODE can be one or more characters (show U+XXXX), hex numbers
+ (show character), or dash separated range.
*/
+#define FOR_unicode
#include "toys.h"
+static char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2"
+ "DC3DC4NAKSYNETBCANEM SUBESCFS GS RS US ";
+
+static void codepoint(unsigned wc)
+{
+ char *s = toybuf + sprintf(toybuf, "U+%04X : ", wc), *ss;
+ unsigned n, i;
+
+ if (wc>31 && wc!=127) {
+ s += n = wctoutf8(ss = s, wc);
+ if (n>1) for (i = 0; i<n; i++) s += sprintf(s, " : %#02x"+2*!!i, *ss++);
+ } else s = memcpy(s, (wc==127) ? "DEL" : low+wc*3, 3)+3;
+ *s++ = '\n';
+ writeall(1, toybuf, s-toybuf);
+}
+
+void unicode_main(void)
+{
+ int from, to, n;
+ char next, **args, *s;
+ wchar_t wc;
+
+ // Loop through args, handling range, hex code, or character(s)
+ for (args = toys.optargs; *args; args++) {
+ if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2)
+ while (from <= to) codepoint(from++);
+ else if (sscanf(*args, "%x%c", &from, &next) == 1) codepoint(from);
+ else for (s = *args; (n = utf8towc(&wc, s, 4)) > 0; s += n) codepoint(wc);
+ }
+}
+
void ascii_main(void)
{
- char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
- "NAKSYNETBCANEM SUBESCFS GS RS US ";
- int x, y;
-
- for (x = 0; x<8; x++) printf("Dec Hex%*c", 2+2*(x<2)+(x>4), ' ');
- xputc('\n');
- for (y=0; y<=15; y++) {
- for (x=0; x<8; x++) {
- int i = x*16+y;
-
- if (i>95 && i<100) putchar(' ');
- printf("% 3d %02X ", i, i);
- if (i<32 || i==127) printf("%.3s ", (i==127) ? "DEL" : low+3*i);
- else printf("%c ", i);
- }
- xputc('\n');
+ char *s = toybuf;
+ int i, x, y;
+
+ for (y = -1; y<16; y++) for (x = 0; x<8; x++) {
+ if (y>=0) {
+ i = (x<<4)+y;
+ s += sprintf(s, "% *d %02X ", 3+(x>5), i, i);
+ if (i<32 || i==127) s += sprintf(s, "%.3s", (i<32) ? low+3*i : "DEL");
+ else *s++ = i;
+ } else s += sprintf(s, "Dec Hex%*c", 1+2*(x<2)+(x>4), ' ');
+ *s++ = (x>6) ? '\n' : ' ';
}
+ writeall(1, toybuf, s-toybuf);
}