From abb8ca2455f3efd6f8f0eed78c54829bf0a9001e Mon Sep 17 00:00:00 2001 From: Felix Janda Date: Thu, 8 Nov 2012 11:19:07 -0600 Subject: wc -m only cares about counting characters. Attached is a try on implementing it and some test cases for it. The test cases are only for UTF-8 locales. --- scripts/test/wc.test | 26 +++++++++++++++++++++++++- toys.h | 3 +++ toys/posix/wc.c | 36 ++++++++++++++++++++++++++++-------- 3 files changed, 56 insertions(+), 9 deletions(-) mode change 100644 => 100755 scripts/test/wc.test diff --git a/scripts/test/wc.test b/scripts/test/wc.test old mode 100644 new mode 100755 index 1b9a051c..801cd713 --- a/scripts/test/wc.test +++ b/scripts/test/wc.test @@ -18,5 +18,29 @@ testing "wc -c" "wc -c file1" "26 file1\n" "" "" testing "wc -l" "wc -l file1" "4 file1\n" "" "" testing "wc -w" "wc -w file1" "5 file1\n" "" "" testing "wc format" "wc file1" "4 5 26 file1\n" "" "" -testing "wc multiple files" "wc input - file1" "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b" +testing "wc multiple files" "wc input - file1" \ + "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b" + +#Tests for wc -m +if printf "%s" "$LANG" | grep -q UTF-8 +then + +printf " " > file1 +for i in $(seq 1 8192) +do + printf "ü" >> file1 +done +testing "wc -m" "wc -m file1" "8193 file1\n" "" "" +printf " " > file1 +for i in $(seq 1 8192) +do + printf "ü" >> file1 +done +testing "wc -m (invalid chars)" "wc -m file1" "8193 file1\n" "" "" +testing "wc -mlw" "wc -mlw input" "1 2 11 input\n" "hello, 世界!\n" "" + +else +printf "skipping tests for wc -m" +fi + rm file1 diff --git a/toys.h b/toys.h index 6963ae72..eb7c67f6 100644 --- a/toys.h +++ b/toys.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -46,6 +47,8 @@ #include #include #include +#include +#include #include "lib/lib.h" #include "toys/e2fs.h" diff --git a/toys/posix/wc.c b/toys/posix/wc.c index 7a5e5a30..3896b73a 100644 --- a/toys/posix/wc.c +++ b/toys/posix/wc.c @@ -6,22 +6,24 @@ * * See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html -USE_WC(NEWTOY(wc, "cwl", TOYFLAG_USR|TOYFLAG_BIN)) +USE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN)) config WC bool "wc" default y help - usage: wc -lwc [FILE...] + usage: wc -lwcm [FILE...] Count lines, words, and characters in input. -l show lines -w show words - -c show characters + -c show bytes + -m show characters - By default outputs lines, words, characters, and filename for each - argument (or from stdin if none). + By default outputs lines, words, bytes, and filename for each + argument (or from stdin if none). Displays only either bytes + or characters. */ #define FOR_wc @@ -47,7 +49,8 @@ static void show_lengths(unsigned long *lengths, char *name) static void do_wc(int fd, char *name) { - int i, len; + int i, len, clen=1, space; + wchar_t wchar; unsigned long word=0, lengths[]={0,0,0}; for (;;) { @@ -57,9 +60,24 @@ static void do_wc(int fd, char *name) toys.exitval = EXIT_FAILURE; } if (len<1) break; - for (i=0; i>1; loopfiles(toys.optargs, do_wc); if (toys.optc>1) show_lengths(TT.totals, "total"); } -- cgit v1.2.3