aboutsummaryrefslogtreecommitdiff
path: root/toys
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2021-05-15 11:14:03 -0500
committerRob Landley <rob@landley.net>2021-05-15 11:14:03 -0500
commitd3025b14b9c13286b79f256d019a99da9425ea0e (patch)
tree02a40c59346677cb5f6a51137f4a39d16ae6b743 /toys
parent08481ee37ad5070ff1033d57351c3fa456d0729d (diff)
downloadtoybox-d3025b14b9c13286b79f256d019a99da9425ea0e.tar.gz
Convert utf8towc from wchar_t to unsigned (to match wctoutf8).
The maximum unicode code point is 0x10ffff which is 21 bits.
Diffstat (limited to 'toys')
-rw-r--r--toys/example/demo_utf8towc.c4
-rw-r--r--toys/pending/vi.c5
-rw-r--r--toys/posix/cut.c2
-rw-r--r--toys/posix/expand.c2
-rw-r--r--toys/posix/file.c29
-rw-r--r--toys/posix/grep.c2
-rw-r--r--toys/posix/wc.c3
7 files changed, 21 insertions, 26 deletions
diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c
index 136be6ca..2c6050b1 100644
--- a/toys/example/demo_utf8towc.c
+++ b/toys/example/demo_utf8towc.c
@@ -19,8 +19,8 @@ void demo_utf8towc_main(void)
{
mbstate_t mb;
int len1, len2;
- unsigned u, h;
- wchar_t wc1, wc2;
+ unsigned u, h, wc2;
+ wchar_t wc1;
memset(&mb, 0, sizeof(mb));
for (u = 1; u<=0x10ffff; u++) {
diff --git a/toys/pending/vi.c b/toys/pending/vi.c
index da43d5d5..87c49d13 100644
--- a/toys/pending/vi.c
+++ b/toys/pending/vi.c
@@ -84,7 +84,7 @@ static const char *specials = ",.:;=-+*/(){}<>[]!@#$%^&|\\?\"\'";
//get utf8 length and width at same time
static int utf8_lnw(int *width, char *s, int bytes)
{
- wchar_t wc;
+ unsigned wc;
int length = 1;
if (*s == '\t') *width = TT.tabstop;
@@ -1312,10 +1312,9 @@ static int crunch_nstr(char **str, int width, int n, FILE *out, char *escmore,
{
int columns = 0, col, bytes;
char *start, *end;
+ unsigned wc;
for (end = start = *str; *end && n>0; columns += col, end += bytes, n -= bytes) {
- wchar_t wc;
-
if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) {
if (!escmore || wc>255 || !strchr(escmore, wc)) {
if (width-columns<col) break;
diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 61b2b409..6a295846 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -85,7 +85,7 @@ static void cut_line(char **pline, long len)
count = ss-s;
} else if (toys.optflags&FLAG_c) {
- wchar_t wc;
+ unsigned wc;
char *sss;
// Find start
diff --git a/toys/posix/expand.c b/toys/posix/expand.c
index f3cd44d0..e15d30d3 100644
--- a/toys/posix/expand.c
+++ b/toys/posix/expand.c
@@ -43,7 +43,7 @@ static void do_expand(int fd, char *name)
}
if (!len) break;
for (i=0; i<len; i++) {
- wchar_t blah;
+ unsigned blah;
int width = utf8towc(&blah, toybuf+i, len-i);
char c;
diff --git a/toys/posix/file.c b/toys/posix/file.c
index 0f8b5314..f7a41569 100644
--- a/toys/posix/file.c
+++ b/toys/posix/file.c
@@ -196,12 +196,12 @@ bad:
static void do_regular_file(int fd, char *name)
{
- char *s;
+ char *s = toybuf;
unsigned len, magic;
// zero through elf shnum, just in case
- memset(toybuf, 0, 80);
- if ((len = readall(fd, s = toybuf, sizeof(toybuf)))<0) perror_msg("%s", name);
+ memset(s, 0, 80);
+ if ((len = readall(fd, s, sizeof(toybuf)-8))<0) perror_msg("%s", name);
if (!len) xputs("empty");
// 45 bytes: https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
@@ -235,7 +235,7 @@ static void do_regular_file(int fd, char *name)
s-3, (int)peek_le(s, 2), (int)peek_le(s+2, 2));
// TODO: parsing JPEG for width/height is harder than GIF or PNG.
- else if (len>32 && !memcmp(toybuf, "\xff\xd8", 2)) xputs("JPEG image data");
+ else if (len>32 && !memcmp(s, "\xff\xd8", 2)) xputs("JPEG image data");
// https://en.wikipedia.org/wiki/Java_class_file#General_layout
else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe"))
@@ -252,9 +252,9 @@ static void do_regular_file(int fd, char *name)
else if (len>85 && strstart(&s, "07070")) {
char *cpioformat = "unknown type";
- if (toybuf[5] == '7') cpioformat = "pre-SVR4 or odc";
- else if (toybuf[5] == '1') cpioformat = "SVR4 with no CRC";
- else if (toybuf[5] == '2') cpioformat = "SVR4 with CRC";
+ if (*s == '7') cpioformat = "pre-SVR4 or odc";
+ else if (*s == '1') cpioformat = "SVR4 with no CRC";
+ else if (*s == '2') cpioformat = "SVR4 with CRC";
xprintf("ASCII cpio archive (%s)\n", cpioformat);
} else if (len>33 && ((magic=peek(&s,2))==0143561 || magic==070707)) {
if (magic == 0143561) printf("byte-swapped ");
@@ -265,16 +265,12 @@ static void do_regular_file(int fd, char *name)
(s[262]!=' ' || s[263]!=' ')?"":" (GNU)");
// zip/jar/apk archive, ODF/OOXML document, or such
else if (len>5 && strstart(&s, "PK\03\04")) {
- int ver = toybuf[4];
-
xprintf("Zip archive data");
- if (ver) xprintf(", requires at least v%d.%d to extract", ver/10, ver%10);
+ if (*s) xprintf(", requires at least v%d.%d to extract", *s/10, *s%10);
xputc('\n');
} else if (len>9 && strstart(&s, "7z\xbc\xaf\x27\x1c")) {
- int ver = toybuf[6]*10+toybuf[7];
-
xprintf("7-zip archive data");
- if (ver) xprintf(", version %d.%d", ver/10, ver%10);
+ if (*s || s[1]) xprintf(", version %d.%d", *s, s[1]);
xputc('\n');
} else if (len>4 && strstart(&s, "BZh") && isdigit(*s))
xprintf("bzip2 compressed data, block size = %c00k\n", *s);
@@ -410,13 +406,14 @@ static void do_regular_file(int fd, char *name)
// Whitespace is allowed between the #! and the interpreter
while (isspace(*s)) s++;
if (strstart(&s, "/usr/bin/env")) while (isspace(*s)) s++;
- for (what = s; (s-toybuf)<len && !isspace(*s); s++);
+ for (what = s; *s && !isspace(*s); s++);
strcpy(s, " script");
// Distinguish ASCII text, UTF-8 text, or data
} else for (i = 0; i<len; ++i) {
- if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) {
- wchar_t wc;
+ if (!(isprint(s[i]) || isspace(s[i]))) {
+ unsigned wc;
+
if ((bytes = utf8towc(&wc, s+i, len-i))>0 && wcwidth(wc)>=0) {
i += bytes-1;
if (!what) what = "UTF-8 text";
diff --git a/toys/posix/grep.c b/toys/posix/grep.c
index 52d10139..8eb3c03a 100644
--- a/toys/posix/grep.c
+++ b/toys/posix/grep.c
@@ -124,7 +124,7 @@ static void do_grep(int fd, char *name)
if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
char buf[256];
int len, i = 0;
- wchar_t wc;
+ unsigned wc;
// If the first 256 bytes don't parse as utf8, call it binary.
if (0<(len = read(fd, buf, 256))) {
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 910e4690..118e7750 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -74,6 +74,7 @@ static void do_wc(int fd, char *name)
for (;;) {
int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
+ unsigned wchar;
if (len2<0) perror_msg_raw(name);
else len += len2;
@@ -85,8 +86,6 @@ static void do_wc(int fd, char *name)
if (FLAG(m)) {
// If we've consumed next wide char
if (--clen<1) {
- wchar_t wchar;
-
// next wide size, don't count invalid, fetch more data if necessary
clen = utf8towc(&wchar, toybuf+pos, len-pos);
if (clen == -1) continue;