diff options
author | Rob Landley <rob@landley.net> | 2012-11-28 03:12:02 -0600 |
---|---|---|
committer | Rob Landley <rob@landley.net> | 2012-11-28 03:12:02 -0600 |
commit | a9464f41539f4800744b4185c6a4d4c5e38eeadf (patch) | |
tree | 89cec6fdf3ac51f7dc3ffb1011daf277a88a9141 /toys | |
parent | a03f3e120c96ed58e6913dae3d21bdea59390c2b (diff) | |
download | toybox-a9464f41539f4800744b4185c6a4d4c5e38eeadf.tar.gz |
The previous wc -m didn't handle multibyte characters that crossed a buffer boundary, so take a guess at making that work. (I haven't got a test case for this. I also don't know how to handle invalid sequences so just don't count them.)
Diffstat (limited to 'toys')
-rw-r--r-- | toys/posix/wc.c | 28 |
1 files changed, 17 insertions, 11 deletions
diff --git a/toys/posix/wc.c b/toys/posix/wc.c index d2eb306d..d6029b6e 100644 --- a/toys/posix/wc.c +++ b/toys/posix/wc.c @@ -51,24 +51,30 @@ static void do_wc(int fd, char *name) unsigned long word=0, lengths[]={0,0,0}; for (;;) { - len = read(fd, toybuf, sizeof(toybuf)); - if (len<0) { + i = 0; +again: + len = i+read(fd, toybuf+i, sizeof(toybuf)-i); + if (len < i) { perror_msg("%s",name); - toys.exitval = EXIT_FAILURE; + toys.exitval = 1; } - if (len<1) break; + if (!len) break; for (i=0; i<len; i+=clen) { - wchar_t wchar; if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) { + wchar_t wchar = 0; + clen = mbrtowc(&wchar, toybuf+i, len-i, 0); - if(clen==(size_t)(-1)) { - if(i!=len-1) { - clen = 1; + if (clen < 1) { + // If the problem might be buffer wrap, move and read more data + if (i) { + memmove(toybuf, toybuf+i, sizeof(toybuf)-i); + i = len - i; + goto again; + } else { + clen=1; continue; - } else break; + } } - if(clen==(size_t)(-2)) break; - if(clen==0) clen=1; space = iswspace(wchar); } else space = isspace(toybuf[i]); |