diff options
| author | Felix Janda <felix.janda@posteo.de> | 2012-11-08 11:19:07 -0600 | 
|---|---|---|
| committer | Felix Janda <felix.janda@posteo.de> | 2012-11-08 11:19:07 -0600 | 
| commit | abb8ca2455f3efd6f8f0eed78c54829bf0a9001e (patch) | |
| tree | f65809ecc156e251b65684530073bf3ea87ccb11 | |
| parent | e5138f409785757aa7d77d558b4bf0d9f01a6176 (diff) | |
| download | toybox-abb8ca2455f3efd6f8f0eed78c54829bf0a9001e.tar.gz | |
wc -m only cares about counting characters. Attached is a try on implementing it and some test cases for it. The test cases are only for UTF-8 locales.
| -rwxr-xr-x[-rw-r--r--] | scripts/test/wc.test | 26 | ||||
| -rw-r--r-- | toys.h | 3 | ||||
| -rw-r--r-- | toys/posix/wc.c | 36 | 
3 files changed, 56 insertions, 9 deletions
| diff --git a/scripts/test/wc.test b/scripts/test/wc.test index 1b9a051c..801cd713 100644..100755 --- a/scripts/test/wc.test +++ b/scripts/test/wc.test @@ -18,5 +18,29 @@ testing "wc -c" "wc -c file1" "26 file1\n" "" ""  testing "wc -l" "wc -l file1" "4 file1\n" "" ""  testing "wc -w" "wc -w file1" "5 file1\n" "" ""  testing "wc format" "wc file1" "4 5 26 file1\n" "" "" -testing "wc multiple files" "wc input - file1" "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b" +testing "wc multiple files" "wc input - file1" \ +        "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b" + +#Tests for wc -m +if printf "%s" "$LANG" | grep -q UTF-8 +then + +printf " " > file1 +for i in $(seq 1 8192) +do +  printf "ü" >> file1 +done +testing "wc -m" "wc -m file1" "8193 file1\n" "" "" +printf " " > file1 +for i in $(seq 1 8192) +do +  printf "ü" >> file1 +done +testing "wc -m (invalid chars)" "wc -m file1" "8193 file1\n" "" "" +testing "wc -mlw" "wc -mlw input" "1 2 11 input\n" "hello, 世界!\n" "" + +else +printf "skipping tests for wc -m" +fi +  rm file1 @@ -16,6 +16,7 @@  #include <inttypes.h>  #include <limits.h>  #include <libgen.h> +#include <locale.h>  #include <math.h>  #include <pty.h>  #include <pwd.h> @@ -46,6 +47,8 @@  #include <unistd.h>  #include <utime.h>  #include <utmpx.h> +#include <wchar.h> +#include <wctype.h>  #include "lib/lib.h"  #include "toys/e2fs.h" diff --git a/toys/posix/wc.c b/toys/posix/wc.c index 7a5e5a30..3896b73a 100644 --- a/toys/posix/wc.c +++ b/toys/posix/wc.c @@ -6,22 +6,24 @@   *   * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html -USE_WC(NEWTOY(wc, "cwl", TOYFLAG_USR|TOYFLAG_BIN)) +USE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN))  config WC  	bool "wc"  	default y  	help -	  usage: wc -lwc [FILE...] +	  usage: wc -lwcm [FILE...]  	  Count lines, words, and characters in input.  	  -l	show lines  	  -w	show words -	  -c	show characters +	  -c	show bytes +	  -m	show characters -	  By default outputs lines, words, characters, and filename for each -	  argument (or from stdin if none). +	  By default outputs lines, words, bytes, and filename for each +	  argument (or from stdin if none). Displays only either bytes +	  or characters.  */  #define FOR_wc @@ -47,7 +49,8 @@ static void show_lengths(unsigned long *lengths, char *name)  static void do_wc(int fd, char *name)  { -	int i, len; +	int i, len, clen=1, space; +	wchar_t wchar;  	unsigned long word=0, lengths[]={0,0,0};  	for (;;) { @@ -57,9 +60,24 @@ static void do_wc(int fd, char *name)  			toys.exitval = EXIT_FAILURE;  		}  		if (len<1) break; -		for (i=0; i<len; i++) { +		for (i=0; i<len; i+=clen) { +			if(toys.optflags&8) { +				clen = mbrtowc(&wchar, toybuf+i, len-i, 0); +				if(clen==(size_t)(-1)) { +					if(i!=len-1) { +						clen = 1; +						continue; +					} +					else break; +				} +				if(clen==(size_t)(-2)) break; +				if(clen==0) clen=1; +				space = iswspace(wchar); +			} +			else space = isspace(toybuf[i]); +  			if (toybuf[i]==10) lengths[0]++; -			if (isspace(toybuf[i])) word=0; +			if (space) word=0;  			else {  				if (!word) lengths[1]++;  				word=1; @@ -73,6 +91,8 @@ static void do_wc(int fd, char *name)  void wc_main(void)  { +	setlocale(LC_ALL, ""); +	toys.optflags |= (toys.optflags&8)>>1;  	loopfiles(toys.optargs, do_wc);  	if (toys.optc>1) show_lengths(TT.totals, "total");  } | 
