aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2021-05-15 11:14:03 -0500
committerRob Landley <rob@landley.net>2021-05-15 11:14:03 -0500
commitd3025b14b9c13286b79f256d019a99da9425ea0e (patch)
tree02a40c59346677cb5f6a51137f4a39d16ae6b743
parent08481ee37ad5070ff1033d57351c3fa456d0729d (diff)
downloadtoybox-d3025b14b9c13286b79f256d019a99da9425ea0e.tar.gz
Convert utf8towc from wchar_t to unsigned (to match wctoutf8).
The maximum unicode code point is 0x10ffff which is 21 bits.
-rw-r--r--lib/lib.c6
-rw-r--r--lib/lib.h2
-rw-r--r--lib/linestack.c3
-rw-r--r--lib/llist.c2
-rw-r--r--toys/example/demo_utf8towc.c4
-rw-r--r--toys/pending/vi.c5
-rw-r--r--toys/posix/cut.c2
-rw-r--r--toys/posix/expand.c2
-rw-r--r--toys/posix/file.c29
-rw-r--r--toys/posix/grep.c2
-rw-r--r--toys/posix/wc.c3
11 files changed, 27 insertions, 33 deletions
diff --git a/lib/lib.c b/lib/lib.c
index 87bda4f6..9f9b8136 100644
--- a/lib/lib.c
+++ b/lib/lib.c
@@ -370,7 +370,7 @@ int wctoutf8(char *s, unsigned wc)
// Convert utf8 sequence to a unicode wide character
// returns bytes consumed, or -1 if err, or -2 if need more data.
-int utf8towc(wchar_t *wc, char *str, unsigned len)
+int utf8towc(unsigned *wc, char *str, unsigned len)
{
unsigned result, mask, first;
char *s, c;
@@ -403,7 +403,7 @@ char *strlower(char *s)
{
char *try, *new;
int len, mlen = (strlen(s)|7)+9;
- wchar_t c;
+ unsigned c;
try = new = xmalloc(mlen);
@@ -739,7 +739,7 @@ void loopfiles(char **argv, void (*function)(int fd, char *name))
loopfiles_rw(argv, O_RDONLY|O_CLOEXEC|WARN_ONLY, 0, function);
}
-// glue to call dl_lines() from loopfiles
+// glue to call do_lines() from loopfiles
static void (*do_lines_bridge)(char **pline, long len);
static void loopfile_lines_bridge(int fd, char *name)
{
diff --git a/lib/lib.h b/lib/lib.h
index f9c04281..cf1920f9 100644
--- a/lib/lib.h
+++ b/lib/lib.h
@@ -231,7 +231,7 @@ long long atolx(char *c);
long long atolx_range(char *numstr, long long low, long long high);
int stridx(char *haystack, char needle);
int wctoutf8(char *s, unsigned wc);
-int utf8towc(wchar_t *wc, char *str, unsigned len);
+int utf8towc(unsigned *wc, char *str, unsigned len);
char *strlower(char *s);
char *strafter(char *haystack, char *needle);
char *chomp(char *s);
diff --git a/lib/linestack.c b/lib/linestack.c
index 0fc83e6b..e6ae1b57 100644
--- a/lib/linestack.c
+++ b/lib/linestack.c
@@ -93,10 +93,9 @@ int crunch_str(char **str, int width, FILE *out, char *escmore,
{
int columns = 0, col, bytes;
char *start, *end;
+ unsigned wc;
for (end = start = *str; *end; columns += col, end += bytes) {
- wchar_t wc;
-
if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) {
if (!escmore || wc>255 || !strchr(escmore, wc)) {
if (width-columns<col) break;
diff --git a/lib/llist.c b/lib/llist.c
index 45fe014d..e82cb954 100644
--- a/lib/llist.c
+++ b/lib/llist.c
@@ -82,6 +82,7 @@ void *dlist_lpop(void *list)
return v;
}
+// Append to list in-order (*list unchanged unless empty, ->prev is new node)
void dlist_add_nomalloc(struct double_list **list, struct double_list *new)
{
if (*list) {
@@ -92,7 +93,6 @@ void dlist_add_nomalloc(struct double_list **list, struct double_list *new)
} else *list = new->next = new->prev = new;
}
-
// Add an entry to the end of a doubly linked list
struct double_list *dlist_add(struct double_list **list, char *data)
{
diff --git a/toys/example/demo_utf8towc.c b/toys/example/demo_utf8towc.c
index 136be6ca..2c6050b1 100644
--- a/toys/example/demo_utf8towc.c
+++ b/toys/example/demo_utf8towc.c
@@ -19,8 +19,8 @@ void demo_utf8towc_main(void)
{
mbstate_t mb;
int len1, len2;
- unsigned u, h;
- wchar_t wc1, wc2;
+ unsigned u, h, wc2;
+ wchar_t wc1;
memset(&mb, 0, sizeof(mb));
for (u = 1; u<=0x10ffff; u++) {
diff --git a/toys/pending/vi.c b/toys/pending/vi.c
index da43d5d5..87c49d13 100644
--- a/toys/pending/vi.c
+++ b/toys/pending/vi.c
@@ -84,7 +84,7 @@ static const char *specials = ",.:;=-+*/(){}<>[]!@#$%^&|\\?\"\'";
//get utf8 length and width at same time
static int utf8_lnw(int *width, char *s, int bytes)
{
- wchar_t wc;
+ unsigned wc;
int length = 1;
if (*s == '\t') *width = TT.tabstop;
@@ -1312,10 +1312,9 @@ static int crunch_nstr(char **str, int width, int n, FILE *out, char *escmore,
{
int columns = 0, col, bytes;
char *start, *end;
+ unsigned wc;
for (end = start = *str; *end && n>0; columns += col, end += bytes, n -= bytes) {
- wchar_t wc;
-
if ((bytes = utf8towc(&wc, end, 4))>0 && (col = wcwidth(wc))>=0) {
if (!escmore || wc>255 || !strchr(escmore, wc)) {
if (width-columns<col) break;
diff --git a/toys/posix/cut.c b/toys/posix/cut.c
index 61b2b409..6a295846 100644
--- a/toys/posix/cut.c
+++ b/toys/posix/cut.c
@@ -85,7 +85,7 @@ static void cut_line(char **pline, long len)
count = ss-s;
} else if (toys.optflags&FLAG_c) {
- wchar_t wc;
+ unsigned wc;
char *sss;
// Find start
diff --git a/toys/posix/expand.c b/toys/posix/expand.c
index f3cd44d0..e15d30d3 100644
--- a/toys/posix/expand.c
+++ b/toys/posix/expand.c
@@ -43,7 +43,7 @@ static void do_expand(int fd, char *name)
}
if (!len) break;
for (i=0; i<len; i++) {
- wchar_t blah;
+ unsigned blah;
int width = utf8towc(&blah, toybuf+i, len-i);
char c;
diff --git a/toys/posix/file.c b/toys/posix/file.c
index 0f8b5314..f7a41569 100644
--- a/toys/posix/file.c
+++ b/toys/posix/file.c
@@ -196,12 +196,12 @@ bad:
static void do_regular_file(int fd, char *name)
{
- char *s;
+ char *s = toybuf;
unsigned len, magic;
// zero through elf shnum, just in case
- memset(toybuf, 0, 80);
- if ((len = readall(fd, s = toybuf, sizeof(toybuf)))<0) perror_msg("%s", name);
+ memset(s, 0, 80);
+ if ((len = readall(fd, s, sizeof(toybuf)-8))<0) perror_msg("%s", name);
if (!len) xputs("empty");
// 45 bytes: https://www.muppetlabs.com/~breadbox/software/tiny/teensy.html
@@ -235,7 +235,7 @@ static void do_regular_file(int fd, char *name)
s-3, (int)peek_le(s, 2), (int)peek_le(s+2, 2));
// TODO: parsing JPEG for width/height is harder than GIF or PNG.
- else if (len>32 && !memcmp(toybuf, "\xff\xd8", 2)) xputs("JPEG image data");
+ else if (len>32 && !memcmp(s, "\xff\xd8", 2)) xputs("JPEG image data");
// https://en.wikipedia.org/wiki/Java_class_file#General_layout
else if (len>8 && strstart(&s, "\xca\xfe\xba\xbe"))
@@ -252,9 +252,9 @@ static void do_regular_file(int fd, char *name)
else if (len>85 && strstart(&s, "07070")) {
char *cpioformat = "unknown type";
- if (toybuf[5] == '7') cpioformat = "pre-SVR4 or odc";
- else if (toybuf[5] == '1') cpioformat = "SVR4 with no CRC";
- else if (toybuf[5] == '2') cpioformat = "SVR4 with CRC";
+ if (*s == '7') cpioformat = "pre-SVR4 or odc";
+ else if (*s == '1') cpioformat = "SVR4 with no CRC";
+ else if (*s == '2') cpioformat = "SVR4 with CRC";
xprintf("ASCII cpio archive (%s)\n", cpioformat);
} else if (len>33 && ((magic=peek(&s,2))==0143561 || magic==070707)) {
if (magic == 0143561) printf("byte-swapped ");
@@ -265,16 +265,12 @@ static void do_regular_file(int fd, char *name)
(s[262]!=' ' || s[263]!=' ')?"":" (GNU)");
// zip/jar/apk archive, ODF/OOXML document, or such
else if (len>5 && strstart(&s, "PK\03\04")) {
- int ver = toybuf[4];
-
xprintf("Zip archive data");
- if (ver) xprintf(", requires at least v%d.%d to extract", ver/10, ver%10);
+ if (*s) xprintf(", requires at least v%d.%d to extract", *s/10, *s%10);
xputc('\n');
} else if (len>9 && strstart(&s, "7z\xbc\xaf\x27\x1c")) {
- int ver = toybuf[6]*10+toybuf[7];
-
xprintf("7-zip archive data");
- if (ver) xprintf(", version %d.%d", ver/10, ver%10);
+ if (*s || s[1]) xprintf(", version %d.%d", *s, s[1]);
xputc('\n');
} else if (len>4 && strstart(&s, "BZh") && isdigit(*s))
xprintf("bzip2 compressed data, block size = %c00k\n", *s);
@@ -410,13 +406,14 @@ static void do_regular_file(int fd, char *name)
// Whitespace is allowed between the #! and the interpreter
while (isspace(*s)) s++;
if (strstart(&s, "/usr/bin/env")) while (isspace(*s)) s++;
- for (what = s; (s-toybuf)<len && !isspace(*s); s++);
+ for (what = s; *s && !isspace(*s); s++);
strcpy(s, " script");
// Distinguish ASCII text, UTF-8 text, or data
} else for (i = 0; i<len; ++i) {
- if (!(isprint(toybuf[i]) || isspace(toybuf[i]))) {
- wchar_t wc;
+ if (!(isprint(s[i]) || isspace(s[i]))) {
+ unsigned wc;
+
if ((bytes = utf8towc(&wc, s+i, len-i))>0 && wcwidth(wc)>=0) {
i += bytes-1;
if (!what) what = "UTF-8 text";
diff --git a/toys/posix/grep.c b/toys/posix/grep.c
index 52d10139..8eb3c03a 100644
--- a/toys/posix/grep.c
+++ b/toys/posix/grep.c
@@ -124,7 +124,7 @@ static void do_grep(int fd, char *name)
if (!FLAG(a) && !lseek(fd, 0, SEEK_CUR)) {
char buf[256];
int len, i = 0;
- wchar_t wc;
+ unsigned wc;
// If the first 256 bytes don't parse as utf8, call it binary.
if (0<(len = read(fd, buf, 256))) {
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 910e4690..118e7750 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -74,6 +74,7 @@ static void do_wc(int fd, char *name)
for (;;) {
int pos, done = 0, len2 = read(fd, toybuf+len, sizeof(toybuf)-len);
+ unsigned wchar;
if (len2<0) perror_msg_raw(name);
else len += len2;
@@ -85,8 +86,6 @@ static void do_wc(int fd, char *name)
if (FLAG(m)) {
// If we've consumed next wide char
if (--clen<1) {
- wchar_t wchar;
-
// next wide size, don't count invalid, fetch more data if necessary
clen = utf8towc(&wchar, toybuf+pos, len-pos);
if (clen == -1) continue;