Promote unicode (merge into ascii.c)

author: Rob Landley <rob@landley.net> 2021-05-15 10:43:09 -0500
committer: Rob Landley <rob@landley.net> 2021-05-15 10:43:09 -0500
commit: 3670ac7432ea5eb31f7432a20c596c1a990f9975 (patch)
tree: 95a211ccd3485be4baed32aa6223a83ac78478f6
parent: 16e2520bf8d3ac90012bbda150d39856064439c3 (diff)
download: toybox-3670ac7432ea5eb31f7432a20c596c1a990f9975.tar.gz
2 files changed, 62 insertions, 82 deletions
diff --git a/toys/other/ascii.c b/toys/other/ascii.c
index 42f73260..c9448a75 100644
--- a/toys/other/ascii.c
+++ b/toys/other/ascii.c
@@ -4,8 +4,15 @@
  *
  * Technically 7-bit ASCII is ANSI X3.4-1986, a standard available as
  * INCITS 4-1986[R2012] on ansi.org, but they charge for it.
+ *
+ * unicode.c - convert between Unicode and UTF-8
+ *
+ * Copyright 2020 The Android Open Source Project.
+ *
+ * Loosely based on the Plan9/Inferno unicode(1).
 
-USE_ASCII(NEWTOY(ascii, 0, TOYFLAG_USR|TOYFLAG_BIN))
+USE_ASCII(NEWTOY(ascii, 0, TOYFLAG_USR|TOYFLAG_BIN|TOYFLAG_LINEBUF))
+USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
 
 config ASCII
   bool "ascii"
@@ -14,27 +21,65 @@ config ASCII
     usage: ascii
 
     Display ascii character set.
+
+config UNICODE
+  bool "unicode"
+  default y
+  help
+    usage: unicode CODE[-END]...
+
+    Convert between Unicode code points and UTF-8, in both directions.
+    CODE can be one or more characters (show U+XXXX), hex numbers
+    (show character), or dash separated range.
 */
 
+#define FOR_unicode
 #include "toys.h"
 
+static char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2"
+                 "DC3DC4NAKSYNETBCANEM SUBESCFS GS RS US ";
+
+static void codepoint(unsigned wc)
+{
+  char *s = toybuf + sprintf(toybuf, "U+%04X : ", wc), *ss;
+  unsigned n, i;
+
+  if (wc>31 && wc!=127) {
+    s += n = wctoutf8(ss = s, wc);
+    if (n>1) for (i = 0; i<n; i++) s += sprintf(s, " : %#02x"+2*!!i, *ss++);
+  } else s = memcpy(s, (wc==127) ? "DEL" : low+wc*3, 3)+3;
+  *s++ = '\n';
+  writeall(1, toybuf, s-toybuf);
+}
+
+void unicode_main(void)
+{
+  int from, to, n;
+  char next, **args, *s;
+  wchar_t wc;
+
+  // Loop through args, handling range, hex code, or character(s)
+  for (args = toys.optargs; *args; args++) {
+    if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2)
+      while (from <= to) codepoint(from++);
+    else if (sscanf(*args, "%x%c", &from, &next) == 1) codepoint(from);
+    else for (s = *args; (n = utf8towc(&wc, s, 4)) > 0; s += n) codepoint(wc);
+  }
+}
+
 void ascii_main(void)
 {
-  char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
-            "NAKSYNETBCANEM SUBESCFS GS RS US ";
-  int x, y;
-
-  for (x = 0; x<8; x++) printf("Dec Hex%*c", 2+2*(x<2)+(x>4), ' ');
-  xputc('\n');
-  for (y=0; y<=15; y++) {
-    for (x=0; x<8; x++) {
-      int i = x*16+y;
-
-      if (i>95 && i<100) putchar(' ');
-      printf("% 3d %02X ", i, i);
-      if (i<32 || i==127) printf("%.3s ", (i==127) ? "DEL" : low+3*i);
-      else printf("%c ", i);
-    }
-    xputc('\n');
+  char *s = toybuf;
+  int i, x, y;
+
+  for (y = -1; y<16; y++) for (x = 0; x<8; x++) {
+    if (y>=0) {
+      i = (x<<4)+y;
+      s += sprintf(s, "% *d %02X ", 3+(x>5), i, i);
+      if (i<32 || i==127) s += sprintf(s, "%.3s", (i<32) ? low+3*i : "DEL");
+      else *s++ = i;
+    } else s += sprintf(s, "Dec Hex%*c", 1+2*(x<2)+(x>4), ' ');
+    *s++ = (x>6) ? '\n' : ' ';
   }
+  writeall(1, toybuf, s-toybuf);
 }
diff --git a/toys/pending/unicode.c b/toys/pending/unicode.c
deleted file mode 100644
index 0a9eb24a..00000000
--- a/toys/pending/unicode.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/* unicode.c - convert between Unicode and UTF-8
- *
- * Copyright 2020 The Android Open Source Project.
- *
- * Loosely based on the Plan9/Inferno unicode(1).
-
-USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))
-
-config UNICODE
-  bool "unicode"
-  default n
-  help
-    usage: unicode [[min]-max]
-
-    Convert between Unicode code points and UTF-8, in both directions.
-*/
-
-#define FOR_unicode
-#include "toys.h"
-
-static void codepoint(unsigned wc) {
-  char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
-            "NAKSYNETBCANEM SUBESCFS GS RS US ";
-  unsigned n, i;
-
-  printf("U+%04X : ", wc);
-  if (wc < ' ') printf("%.3s", low+(wc*3));
-  else if (wc == 0x7f) printf("DEL");
-  else {
-    toybuf[n = wctoutf8(toybuf, wc)] = 0;
-    printf("%s%s", toybuf, n>1 ? " :":"");
-    if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]);
-  }
-  xputc('\n');
-}
-
-void unicode_main(void)
-{
-  unsigned from, to;
-  char next, **args;
-
-  for (args = toys.optargs; *args; args++) {
-    // unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc.
-    if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) {
-      while (from <= to) codepoint(from++);
-
-    // unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`.
-    } else if (sscanf(*args, "%x%c", &from, &next) == 1) {
-      codepoint(from);
-
-    // unicode hello => table showing every character in the string.
-    } else {
-      char *s = *args;
-      size_t l = strlen(s);
-      wchar_t wc;
-      int n;
-
-      while ((n = utf8towc(&wc, s, l)) > 0) {
-        codepoint(wc);
-        s += n;
-        l -= n;
-      }
-    }
-  }
-}
author	Rob Landley <rob@landley.net>	2021-05-15 10:43:09 -0500
committer	Rob Landley <rob@landley.net>	2021-05-15 10:43:09 -0500
commit	3670ac7432ea5eb31f7432a20c596c1a990f9975 (patch)
tree	95a211ccd3485be4baed32aa6223a83ac78478f6
parent	16e2520bf8d3ac90012bbda150d39856064439c3 (diff)
download	toybox-3670ac7432ea5eb31f7432a20c596c1a990f9975.tar.gz