diff options
author | Rob Landley <rob@landley.net> | 2021-05-15 11:14:03 -0500 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2021-05-15 11:14:03 -0500 |
commit | d3025b14b9c13286b79f256d019a99da9425ea0e (patch) | |
tree | 02a40c59346677cb5f6a51137f4a39d16ae6b743 /toys/posix | |
parent | 08481ee37ad5070ff1033d57351c3fa456d0729d (diff) | |
download | toybox-d3025b14b9c13286b79f256d019a99da9425ea0e.tar.gz |
Convert utf8towc from wchar_t to unsigned (to match wctoutf8).
The maximum unicode code point is 0x10ffff which is 21 bits.
Diffstat (limited to 'toys/posix')
-rw-r--r-- | toys/posix/cut.c | 2 | ||||
-rw-r--r-- | toys/posix/expand.c | 2 | ||||
-rw-r--r-- | toys/posix/file.c | 29 | ||||
-rw-r--r-- | toys/posix/grep.c | 2 | ||||
-rw-r--r-- | toys/posix/wc.c | 3 |
5 files changed, 17 insertions, 21 deletions
diff --git a/toys/posix/cut.c b/toys/posix/cut.c index 61b2b409..6a295846 100644 --- a/toys/posix/cut.c +++ b/toys/posix/cut.c @@ -85,7 +85,7 @@ static void cut_line(char **pline, long len) count = ss-s; } else if (toys.optflags&FLAG_c) { - wchar_t wc; + unsigned wc; char *sss; // Find start diff --git a/toys/posix/expand.c b/toys/posix/expand.c index f3cd44d0..e15d30d3 100644 --- a/toys/posix/expand.c +++ b/toys/posix/expand.c @@ -43,7 +43,7 @@ static void do_expand(int fd, char *name) } if (!len) break; for (i=0; i<len; i++) { - wchar_t blah; + unsigned blah; int width = utf8towc(&blah, toybuf+i, len-i); char c; diff --git a/toys/posix/file.c b/toys/posix/file.c index 0f8b5314..f7a41569 100644 --- a/toys/posix/file.c +++ b/toys/posix/file.c @@ -196,12 +196,12 @@ bad: static void do_regular_file(int fd, char *name) { - char *s; + char *s = toybuf; unsigned len, magic; // zero through elf shnum, just in case - memset(toybuf, 0, 80); - if ((len = readall(fd, s = toybuf, sizeof(toybuf)))<0) perror_msg("%s", name); + memset(s, 0, 80); + if ((len = readall(fd, s, sizeof(toybuf)-8))<0) perror_msg("%s", name); if (!len) xputs("empty"); // 45 bytes: https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html @@ -235,7 +235,7 @@ static void do_regular_file(int fd, char *name) s-3, (int)peek_le(s, 2), (int)peek_le(s+2, 2)); // TODO: parsing JPEG for width/height is harder than GIF or PNG. - else if (len>32 && !memcmp(toybuf, "\xff\xd8", 2)) xputs("JPEG image data"); + else if (len>32 && !memcmp(s, "\xff\xd8", 2)) xputs("JPEG image data"); // https://en.wikipedia.org/wiki/Java_class_file#General_layout else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe")) @@ -252,9 +252,9 @@ static void do_regular_file(int fd, char *name) else if (len>85 && strstart(&s, "07070")) { char *cpioformat = "unknown type"; - if (toybuf[5] == '7') cpioformat = "pre-SVR4 or odc"; - else if (toybuf[5] == '1') cpioformat = "SVR4 with no CRC"; - else if (toybuf[5] == '2') cpioformat = "SVR4 with CRC"; + if (*s == '7') cpioformat = "pre-SVR4 or odc"; + else if (*s == '1') cpioformat = "SVR4 with no CRC"; + else if (*s == '2') cpioformat = "SVR4 with CRC"; xprintf("ASCII cpio archive (%s)\n", cpioformat); } else if (len>33 && ((magic=peek(&s,2))==0143561 || magic==070707)) { if (magic == 0143561) printf("byte-swapped "); @@ -265,16 +265,12 @@ static void do_regular_file(int fd, char *name) (s[262]!=' ' || s[263]!=' ')?"":" (GNU)"); // zip/jar/apk archive, ODF/OOXML document, or such else if (len>5 && strstart(&s, "PK\03\04")) { - int ver = toybuf[4]; - xprintf("Zip archive data"); - if (ver) xprintf(", requires at least v%d.%d to extract", ver/10, ver%10); + if (*s) xprintf(", requires at least v%d.%d to extract", *s/10, *s%10); xputc('\n'); } else if (len>9 && strstart(&s, "7z\xbc\xaf\x27\x1c")) { - int ver = toybuf[6]*10+toybuf[7]; - xprintf("7-zip archive data"); - if (ver) xprintf(", version %d.%d", ver/10, ver%10); + if (*s || s[1]) xprintf(", version %d.%d", *s, s[1]); xputc('\n'); } else if (len>4 && strstart(&s, "BZh") && isdigit(*s)) xprintf("bzip2 compressed data, block size = %c00k\n", *s); @@ -410,13 +406,14 @@ static void do_regular_file(int fd, char *name) // Whitespace is allowed between the #! and the interpreter while (isspace(*s)) s++; if (strstart(&s, "/usr/bin/env")) while (isspace(*s)) s++; - for (what = s; (s-toybuf)<len && !isspace(*s); s++); + for (what = s; *s && !isspace(*s); s++); strcpy(s, " script"); // Distinguish ASCII text, UTF-8 text, or data } else for (i = 0; i<len; ++i) { - if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) { - wchar_t wc; + if (!(isprint(s[i]) || isspace(s[i]))) { + unsigned wc; + if ((bytes = utf8towc(&wc, s+i, len-i))>0 && wcwidth(wc)>=0) { i += bytes-1; if (!what) what = "UTF-8 text"; diff --git a/toys/posix/grep.c b/toys/posix/grep.c index 52d10139..8eb3c03a 100644 --- a/toys/posix/grep.c +++ b/toys/posix/grep.c @@ -124,7 +124,7 @@ static void do_grep(int fd, char *name) if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) { char buf[256]; int len, i = 0; - wchar_t wc; + unsigned wc; // If the first 256 bytes don't parse as utf8, call it binary. if (0<(len = read(fd, buf, 256))) { diff --git a/toys/posix/wc.c b/toys/posix/wc.c index 910e4690..118e7750 100644 --- a/toys/posix/wc.c +++ b/toys/posix/wc.c @@ -74,6 +74,7 @@ static void do_wc(int fd, char *name) for (;;) { int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len); + unsigned wchar; if (len2<0) perror_msg_raw(name); else len += len2; @@ -85,8 +86,6 @@ static void do_wc(int fd, char *name) if (FLAG(m)) { // If we've consumed next wide char if (--clen<1) { - wchar_t wchar; - // next wide size, don't count invalid, fetch more data if necessary clen = utf8towc(&wchar, toybuf+pos, len-pos); if (clen == -1) continue; |