aboutsummaryrefslogtreecommitdiff
path: root/toys/posix/wc.c
diff options
context:
space:
mode:
authorRob Landley <rob@landley.net>2012-11-28 03:12:02 -0600
committerRob Landley <rob@landley.net>2012-11-28 03:12:02 -0600
commita9464f41539f4800744b4185c6a4d4c5e38eeadf (patch)
tree89cec6fdf3ac51f7dc3ffb1011daf277a88a9141 /toys/posix/wc.c
parenta03f3e120c96ed58e6913dae3d21bdea59390c2b (diff)
downloadtoybox-a9464f41539f4800744b4185c6a4d4c5e38eeadf.tar.gz
The previous wc -m didn't handle multibyte characters that crossed a buffer boundary, so take a guess at making that work. (I haven't got a test case for this. I also don't know how to handle invalid sequences so just don't count them.)
Diffstat (limited to 'toys/posix/wc.c')
-rw-r--r--toys/posix/wc.c28
1 files changed, 17 insertions, 11 deletions
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index d2eb306d..d6029b6e 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -51,24 +51,30 @@ static void do_wc(int fd, char *name)
unsigned long word=0, lengths[]={0,0,0};
for (;;) {
- len = read(fd, toybuf, sizeof(toybuf));
- if (len<0) {
+ i = 0;
+again:
+ len = i+read(fd, toybuf+i, sizeof(toybuf)-i);
+ if (len < i) {
perror_msg("%s",name);
- toys.exitval = EXIT_FAILURE;
+ toys.exitval = 1;
}
- if (len<1) break;
+ if (!len) break;
for (i=0; i<len; i+=clen) {
- wchar_t wchar;
if (CFG_TOYBOX_I18N && (toys.optflags&FLAG_m)) {
+ wchar_t wchar = 0;
+
clen = mbrtowc(&wchar, toybuf+i, len-i, 0);
- if(clen==(size_t)(-1)) {
- if(i!=len-1) {
- clen = 1;
+ if (clen < 1) {
+ // If the problem might be buffer wrap, move and read more data
+ if (i) {
+ memmove(toybuf, toybuf+i, sizeof(toybuf)-i);
+ i = len - i;
+ goto again;
+ } else {
+ clen=1;
continue;
- } else break;
+ }
}
- if(clen==(size_t)(-2)) break;
- if(clen==0) clen=1;
space = iswspace(wchar);
} else space = isspace(toybuf[i]);