aboutsummaryrefslogtreecommitdiff
path: root/toys/pending/unicode.c
blob: 0a9eb24a366cf810e7a1798b573e397fec10be81 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
/* unicode.c - convert between Unicode and UTF-8
 *
 * Copyright 2020 The Android Open Source Project.
 *
 * Loosely based on the Plan9/Inferno unicode(1).

USE_UNICODE(NEWTOY(unicode, "<1", TOYFLAG_USR|TOYFLAG_BIN))

config UNICODE
  bool "unicode"
  default n
  help
    usage: unicode [[min]-max]

    Convert between Unicode code points and UTF-8, in both directions.
*/

#define FOR_unicode
#include "toys.h"

static void codepoint(unsigned wc) {
  char *low="NULSOHSTXETXEOTENQACKBELBS HT LF VT FF CR SO SI DLEDC1DC2DC3DC4"
            "NAKSYNETBCANEM SUBESCFS GS RS US ";
  unsigned n, i;

  printf("U+%04X : ", wc);
  if (wc < ' ') printf("%.3s", low+(wc*3));
  else if (wc == 0x7f) printf("DEL");
  else {
    toybuf[n = wctoutf8(toybuf, wc)] = 0;
    printf("%s%s", toybuf, n>1 ? " :":"");
    if (n>1) for (i = 0; i < n; i++) printf(" %#02x", toybuf[i]);
  }
  xputc('\n');
}

void unicode_main(void)
{
  unsigned from, to;
  char next, **args;

  for (args = toys.optargs; *args; args++) {
    // unicode 660-666 => table of `U+0600 : ٠ : 0xd9 0xa0` etc.
    if (sscanf(*args, "%x-%x%c", &from, &to, &next) == 2) {
      while (from <= to) codepoint(from++);

    // unicode 666 => just `U+0666 : ٦ : 0xd9 0xa6`.
    } else if (sscanf(*args, "%x%c", &from, &next) == 1) {
      codepoint(from);

    // unicode hello => table showing every character in the string.
    } else {
      char *s = *args;
      size_t l = strlen(s);
      wchar_t wc;
      int n;

      while ((n = utf8towc(&wc, s, l)) > 0) {
        codepoint(wc);
        s += n;
        l -= n;
      }
    }
  }
}