Convert utf8towc from wchar_t to unsigned (to match wctoutf8).

The maximum unicode code point is 0x10ffff which is 21 bits.
author: Rob Landley <rob@landley.net> 2021-05-15 11:14:03 -0500
committer: Rob Landley <rob@landley.net> 2021-05-15 11:14:03 -0500
commit: d3025b14b9c13286b79f256d019a99da9425ea0e (patch)
tree: 02a40c59346677cb5f6a51137f4a39d16ae6b743
parent: 08481ee37ad5070ff1033d57351c3fa456d0729d (diff)
download: toybox-d3025b14b9c13286b79f256d019a99da9425ea0e.tar.gz
11 files changed, 27 insertions, 33 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 87bda4f6..9f9b8136 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -370,7 +370,7 @@ int wctoutf8(char *s, unsigned wc)
 
 // Convert utf8 sequence to a unicode wide character
 // returns bytes consumed, or -1 if err, or -2 if need more data.
-int utf8towc(wchar_t *wc, char *str, unsigned len)
+int utf8towc(unsigned *wc, char *str, unsigned len)
 {
   unsigned result, mask, first;
   char *s, c;
@@ -403,7 +403,7 @@ char *strlower(char *s)
 {
   char *try, *new;
   int len, mlen = (strlen(s)|7)+9;
-  wchar_t c;
+  unsigned c;
 
   try = new = xmalloc(mlen);
 
@@ -739,7 +739,7 @@ void loopfiles(char **argv, void (*function)(int fd, char *name))
   loopfiles_rw(argv, O_RDONLY|O_CLOEXEC|WARN_ONLY, 0, function);
 }
 
-// glue to call dl_lines() from loopfiles
+// glue to call do_lines() from loopfiles
 static void (*do_lines_bridge)(char **pline, long len);
 static void loopfile_lines_bridge(int fd, char *name)
 {
diff --git a/lib/lib.h b/lib/lib.h
index f9c04281..cf1920f9 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -231,7 +231,7 @@ long long atolx(char *c);
 long long atolx_range(char *numstr, long long low, long long high);
 int stridx(char *haystack, char needle);
 int wctoutf8(char *s, unsigned wc);
-int utf8towc(wchar_t *wc, char *str, unsigned len);
+int utf8towc(unsigned *wc, char *str, unsigned len);
 char *strlower(char *s);
 char *strafter(char *haystack, char *needle);
 char *chomp(char *s);
diff --git a/lib/linestack.c b/lib/linestack.c
index 0fc83e6b..e6ae1b57 100644
--- a/lib/linestack.c
+++ b/lib/linestack.c
@@ -93,10 +93,9 @@ int crunch_str(char **str, int width, FILE *out, char *escmore,
 {
   int columns = 0, col, bytes;
   char *start, *end;
+  unsigned wc;
 
   for (end = start = *str; *end; columns += col, end += bytes) {
-    wchar_t wc;
-
     if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) {
       if (!escmore || wc>255 || !strchr(escmore, wc)) {
         if (width-columns<col) break;
diff --git a/lib/llist.c b/lib/llist.c
index 45fe014d..e82cb954 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -82,6 +82,7 @@ void *dlist_lpop(void *list)
   return v;
 }
 
+// Append to list in-order (*list unchanged unless empty, ->prev is new node)
 void dlist_add_nomalloc(struct double_list **list, struct double_list *new)
 {
   if (*list) {
@@ -92,7 +93,6 @@ void dlist_add_nomalloc(struct double_list **list, struct double_list *new)
   } else *list = new->next = new->prev = new;
 }
 
-
 // Add an entry to the end of a doubly linked list
 struct double_list *dlist_add(struct double_list **list, char *data)
 {
diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c
index 136be6ca..2c6050b1 100644
--- a/toys/example/demo_utf8towc.c
+++ b/toys/example/demo_utf8towc.c
@@ -19,8 +19,8 @@ void demo_utf8towc_main(void)
 {
   mbstate_t mb;
   int len1, len2;
-  unsigned u, h;
-  wchar_t wc1, wc2;
+  unsigned u, h, wc2;
+  wchar_t wc1;
 
   memset(&mb, 0, sizeof(mb));
   for (u = 1; u<=0x10ffff; u++) {
diff --git a/toys/pending/vi.c b/toys/pending/vi.c
index da43d5d5..87c49d13 100644
--- a/toys/pending/vi.c
+++ b/toys/pending/vi.c
@@ -84,7 +84,7 @@ static const char *specials = ",.:;=-+*/(){}<>[]!@#$%^&|\\?\"\'";
 //get utf8 length and width at same time
 static int utf8_lnw(int *width, char *s, int bytes)
 {
-  wchar_t wc;
+  unsigned wc;
   int length = 1;
 
   if (*s == '\t') *width = TT.tabstop;
@@ -1312,10 +1312,9 @@ static int crunch_nstr(char **str, int width, int n, FILE *out, char *escmore,
 {
   int columns = 0, col, bytes;
   char *start, *end;
+  unsigned wc;
 
   for (end = start = *str; *end && n>0; columns += col, end += bytes, n -= bytes) {
-    wchar_t wc;
-
     if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) {
       if (!escmore || wc>255 || !strchr(escmore, wc)) {
         if (width-columns<col) break;
diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 61b2b409..6a295846 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -85,7 +85,7 @@ static void cut_line(char **pline, long len)
       count = ss-s;
 
     } else if (toys.optflags&FLAG_c) {
-      wchar_t wc;
+      unsigned wc;
       char *sss;
 
       // Find start
diff --git a/toys/posix/expand.c b/toys/posix/expand.c
index f3cd44d0..e15d30d3 100644
--- a/toys/posix/expand.c
+++ b/toys/posix/expand.c
@@ -43,7 +43,7 @@ static void do_expand(int fd, char *name)
     }
     if (!len) break;
     for (i=0; i<len; i++) {
-      wchar_t blah;
+      unsigned blah;
       int width = utf8towc(&blah, toybuf+i, len-i);
       char c;
 
diff --git a/toys/posix/file.c b/toys/posix/file.c
index 0f8b5314..f7a41569 100644
--- a/toys/posix/file.c
+++ b/toys/posix/file.c
@@ -196,12 +196,12 @@ bad:
 
 static void do_regular_file(int fd, char *name)
 {
-  char *s;
+  char *s = toybuf;
   unsigned len, magic;
 
   // zero through elf shnum, just in case
-  memset(toybuf, 0, 80);
-  if ((len = readall(fd, s = toybuf, sizeof(toybuf)))<0) perror_msg("%s", name);
+  memset(s, 0, 80);
+  if ((len = readall(fd, s, sizeof(toybuf)-8))<0) perror_msg("%s", name);
 
   if (!len) xputs("empty");
   // 45 bytes: https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
@@ -235,7 +235,7 @@ static void do_regular_file(int fd, char *name)
       s-3, (int)peek_le(s, 2), (int)peek_le(s+2, 2));
 
   // TODO: parsing JPEG for width/height is harder than GIF or PNG.
-  else if (len>32 && !memcmp(toybuf, "\xff\xd8", 2)) xputs("JPEG image data");
+  else if (len>32 && !memcmp(s, "\xff\xd8", 2)) xputs("JPEG image data");
 
   // https://en.wikipedia.org/wiki/Java_class_file#General_layout
   else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe"))
@@ -252,9 +252,9 @@ static void do_regular_file(int fd, char *name)
   else if (len>85 && strstart(&s, "07070")) {
     char *cpioformat = "unknown type";
 
-    if (toybuf[5] == '7') cpioformat = "pre-SVR4 or odc";
-    else if (toybuf[5] == '1') cpioformat = "SVR4 with no CRC";
-    else if (toybuf[5] == '2') cpioformat = "SVR4 with CRC";
+    if (*s == '7') cpioformat = "pre-SVR4 or odc";
+    else if (*s == '1') cpioformat = "SVR4 with no CRC";
+    else if (*s == '2') cpioformat = "SVR4 with CRC";
     xprintf("ASCII cpio archive (%s)\n", cpioformat);
   } else if (len>33 && ((magic=peek(&s,2))==0143561 || magic==070707)) {
     if (magic == 0143561) printf("byte-swapped ");
@@ -265,16 +265,12 @@ static void do_regular_file(int fd, char *name)
       (s[262]!=' ' || s[263]!=' ')?"":" (GNU)");
   // zip/jar/apk archive, ODF/OOXML document, or such
   else if (len>5 && strstart(&s, "PK\03\04")) {
-    int ver = toybuf[4];
-
     xprintf("Zip archive data");
-    if (ver) xprintf(", requires at least v%d.%d to extract", ver/10, ver%10);
+    if (*s) xprintf(", requires at least v%d.%d to extract", *s/10, *s%10);
     xputc('\n');
   } else if (len>9 && strstart(&s, "7z\xbc\xaf\x27\x1c")) {
-    int ver = toybuf[6]*10+toybuf[7];
-
     xprintf("7-zip archive data");
-    if (ver) xprintf(", version %d.%d", ver/10, ver%10);
+    if (*s || s[1]) xprintf(", version %d.%d", *s, s[1]);
     xputc('\n');
   } else if (len>4 && strstart(&s, "BZh") && isdigit(*s))
     xprintf("bzip2 compressed data, block size = %c00k\n", *s);
@@ -410,13 +406,14 @@ static void do_regular_file(int fd, char *name)
       // Whitespace is allowed between the #! and the interpreter
       while (isspace(*s)) s++;
       if (strstart(&s, "/usr/bin/env")) while (isspace(*s)) s++;
-      for (what = s; (s-toybuf)<len && !isspace(*s); s++);
+      for (what = s; *s && !isspace(*s); s++);
       strcpy(s, " script");
 
     // Distinguish ASCII text, UTF-8 text, or data
     } else for (i = 0; i<len; ++i) {
-      if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) {
-        wchar_t wc;
+      if (!(isprint(s[i]) || isspace(s[i]))) {
+        unsigned wc;
+
         if ((bytes = utf8towc(&wc, s+i, len-i))>0 && wcwidth(wc)>=0) {
           i += bytes-1;
           if (!what) what = "UTF-8 text";
diff --git a/toys/posix/grep.c b/toys/posix/grep.c
index 52d10139..8eb3c03a 100644
--- a/toys/posix/grep.c
+++ b/toys/posix/grep.c
@@ -124,7 +124,7 @@ static void do_grep(int fd, char *name)
   if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
     char buf[256];
     int len, i = 0;
-    wchar_t wc;
+    unsigned wc;
 
     // If the first 256 bytes don't parse as utf8, call it binary.
     if (0<(len = read(fd, buf, 256))) {
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 910e4690..118e7750 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -74,6 +74,7 @@ static void do_wc(int fd, char *name)
 
   for (;;) {
     int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
+    unsigned wchar;
 
     if (len2<0) perror_msg_raw(name);
     else len += len2;
@@ -85,8 +86,6 @@ static void do_wc(int fd, char *name)
       if (FLAG(m)) {
         // If we've consumed next wide char
         if (--clen<1) {
-          wchar_t wchar;
-
           // next wide size, don't count invalid, fetch more data if necessary
           clen = utf8towc(&wchar, toybuf+pos, len-pos);
           if (clen == -1) continue;
author	Rob Landley <rob@landley.net>	2021-05-15 11:14:03 -0500
committer	Rob Landley <rob@landley.net>	2021-05-15 11:14:03 -0500
commit	d3025b14b9c13286b79f256d019a99da9425ea0e (patch)
tree	02a40c59346677cb5f6a51137f4a39d16ae6b743
parent	08481ee37ad5070ff1033d57351c3fa456d0729d (diff)
download	toybox-d3025b14b9c13286b79f256d019a99da9425ea0e.tar.gz