From d3025b14b9c13286b79f256d019a99da9425ea0e Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Sat, 15 May 2021 11:14:03 -0500 Subject: Convert utf8towc from wchar_t to unsigned (to match wctoutf8). The maximum unicode code point is 0x10ffff which is 21 bits. --- toys/example/demo_utf8towc.c | 4 ++-- toys/pending/vi.c | 5 ++--- toys/posix/cut.c | 2 +- toys/posix/expand.c | 2 +- toys/posix/file.c | 29 +++++++++++++---------------- toys/posix/grep.c | 2 +- toys/posix/wc.c | 3 +-- 7 files changed, 21 insertions(+), 26 deletions(-) (limited to 'toys') diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c index 136be6ca..2c6050b1 100644 --- a/toys/example/demo_utf8towc.c +++ b/toys/example/demo_utf8towc.c @@ -19,8 +19,8 @@ void demo_utf8towc_main(void) { mbstate_t mb; int len1, len2; - unsigned u, h; - wchar_t wc1, wc2; + unsigned u, h, wc2; + wchar_t wc1; memset(&mb, 0, sizeof(mb)); for (u = 1; u<=0x10ffff; u++) { diff --git a/toys/pending/vi.c b/toys/pending/vi.c index da43d5d5..87c49d13 100644 --- a/toys/pending/vi.c +++ b/toys/pending/vi.c @@ -84,7 +84,7 @@ static const char *specials = ",.:;=-+*/(){}<>[]!@#$%^&|\\?\"\'"; //get utf8 length and width at same time static int utf8_lnw(int *width, char *s, int bytes) { - wchar_t wc; + unsigned wc; int length = 1; if (*s == '\t') *width = TT.tabstop; @@ -1312,10 +1312,9 @@ static int crunch_nstr(char **str, int width, int n, FILE *out, char *escmore, { int columns = 0, col, bytes; char *start, *end; + unsigned wc; for (end = start = *str; *end && n>0; columns += col, end += bytes, n -= bytes) { - wchar_t wc; - if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) { if (!escmore || wc>255 || !strchr(escmore, wc)) { if (width-columns32 && !memcmp(toybuf, "\xff\xd8", 2)) xputs("JPEG image data"); + else if (len>32 && !memcmp(s, "\xff\xd8", 2)) xputs("JPEG image data"); // https://en.wikipedia.org/wiki/Java_class_file#General_layout else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe")) @@ -252,9 +252,9 @@ static void do_regular_file(int fd, char *name) else if (len>85 && strstart(&s, "07070")) { char *cpioformat = "unknown type"; - if (toybuf[5] == '7') cpioformat = "pre-SVR4 or odc"; - else if (toybuf[5] == '1') cpioformat = "SVR4 with no CRC"; - else if (toybuf[5] == '2') cpioformat = "SVR4 with CRC"; + if (*s == '7') cpioformat = "pre-SVR4 or odc"; + else if (*s == '1') cpioformat = "SVR4 with no CRC"; + else if (*s == '2') cpioformat = "SVR4 with CRC"; xprintf("ASCII cpio archive (%s)\n", cpioformat); } else if (len>33 && ((magic=peek(&s,2))==0143561 || magic==070707)) { if (magic == 0143561) printf("byte-swapped "); @@ -265,16 +265,12 @@ static void do_regular_file(int fd, char *name) (s[262]!=' ' || s[263]!=' ')?"":" (GNU)"); // zip/jar/apk archive, ODF/OOXML document, or such else if (len>5 && strstart(&s, "PK\03\04")) { - int ver = toybuf[4]; - xprintf("Zip archive data"); - if (ver) xprintf(", requires at least v%d.%d to extract", ver/10, ver%10); + if (*s) xprintf(", requires at least v%d.%d to extract", *s/10, *s%10); xputc('\n'); } else if (len>9 && strstart(&s, "7z\xbc\xaf\x27\x1c")) { - int ver = toybuf[6]*10+toybuf[7]; - xprintf("7-zip archive data"); - if (ver) xprintf(", version %d.%d", ver/10, ver%10); + if (*s || s[1]) xprintf(", version %d.%d", *s, s[1]); xputc('\n'); } else if (len>4 && strstart(&s, "BZh") && isdigit(*s)) xprintf("bzip2 compressed data, block size = %c00k\n", *s); @@ -410,13 +406,14 @@ static void do_regular_file(int fd, char *name) // Whitespace is allowed between the #! and the interpreter while (isspace(*s)) s++; if (strstart(&s, "/usr/bin/env")) while (isspace(*s)) s++; - for (what = s; (s-toybuf)0 && wcwidth(wc)>=0) { i += bytes-1; if (!what) what = "UTF-8 text"; diff --git a/toys/posix/grep.c b/toys/posix/grep.c index 52d10139..8eb3c03a 100644 --- a/toys/posix/grep.c +++ b/toys/posix/grep.c @@ -124,7 +124,7 @@ static void do_grep(int fd, char *name) if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) { char buf[256]; int len, i = 0; - wchar_t wc; + unsigned wc; // If the first 256 bytes don't parse as utf8, call it binary. if (0<(len = read(fd, buf, 256))) { diff --git a/toys/posix/wc.c b/toys/posix/wc.c index 910e4690..118e7750 100644 --- a/toys/posix/wc.c +++ b/toys/posix/wc.c @@ -74,6 +74,7 @@ static void do_wc(int fd, char *name) for (;;) { int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len); + unsigned wchar; if (len2<0) perror_msg_raw(name); else len += len2; @@ -85,8 +86,6 @@ static void do_wc(int fd, char *name) if (FLAG(m)) { // If we've consumed next wide char if (--clen<1) { - wchar_t wchar; - // next wide size, don't count invalid, fetch more data if necessary clen = utf8towc(&wchar, toybuf+pos, len-pos); if (clen == -1) continue; -- cgit v1.2.3