aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFelix Janda <felix.janda@posteo.de>2012-11-08 11:19:07 -0600
committerFelix Janda <felix.janda@posteo.de>2012-11-08 11:19:07 -0600
commitabb8ca2455f3efd6f8f0eed78c54829bf0a9001e (patch)
treef65809ecc156e251b65684530073bf3ea87ccb11
parente5138f409785757aa7d77d558b4bf0d9f01a6176 (diff)
downloadtoybox-abb8ca2455f3efd6f8f0eed78c54829bf0a9001e.tar.gz
wc -m only cares about counting characters. Attached is a try on implementing it and some test cases for it. The test cases are only for UTF-8 locales.
-rwxr-xr-x[-rw-r--r--]scripts/test/wc.test26
-rw-r--r--toys.h3
-rw-r--r--toys/posix/wc.c36
3 files changed, 56 insertions, 9 deletions
diff --git a/scripts/test/wc.test b/scripts/test/wc.test
index 1b9a051c..801cd713 100644..100755
--- a/scripts/test/wc.test
+++ b/scripts/test/wc.test
@@ -18,5 +18,29 @@ testing "wc -c" "wc -c file1" "26 file1\n" "" ""
testing "wc -l" "wc -l file1" "4 file1\n" "" ""
testing "wc -w" "wc -w file1" "5 file1\n" "" ""
testing "wc format" "wc file1" "4 5 26 file1\n" "" ""
-testing "wc multiple files" "wc input - file1" "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b"
+testing "wc multiple files" "wc input - file1" \
+ "1 2 3 input\n0 2 3 -\n4 5 26 file1\n5 9 32 total\n" "a\nb" "a b"
+
+#Tests for wc -m
+if printf "%s" "$LANG" | grep -q UTF-8
+then
+
+printf " " > file1
+for i in $(seq 1 8192)
+do
+ printf "ü" >> file1
+done
+testing "wc -m" "wc -m file1" "8193 file1\n" "" ""
+printf " " > file1
+for i in $(seq 1 8192)
+do
+ printf "ü" >> file1
+done
+testing "wc -m (invalid chars)" "wc -m file1" "8193 file1\n" "" ""
+testing "wc -mlw" "wc -mlw input" "1 2 11 input\n" "hello, 世界!\n" ""
+
+else
+printf "skipping tests for wc -m"
+fi
+
rm file1
diff --git a/toys.h b/toys.h
index 6963ae72..eb7c67f6 100644
--- a/toys.h
+++ b/toys.h
@@ -16,6 +16,7 @@
#include <inttypes.h>
#include <limits.h>
#include <libgen.h>
+#include <locale.h>
#include <math.h>
#include <pty.h>
#include <pwd.h>
@@ -46,6 +47,8 @@
#include <unistd.h>
#include <utime.h>
#include <utmpx.h>
+#include <wchar.h>
+#include <wctype.h>
#include "lib/lib.h"
#include "toys/e2fs.h"
diff --git a/toys/posix/wc.c b/toys/posix/wc.c
index 7a5e5a30..3896b73a 100644
--- a/toys/posix/wc.c
+++ b/toys/posix/wc.c
@@ -6,22 +6,24 @@
*
* See http://opengroup.org/onlinepubs/9699919799/utilities/wc.html
-USE_WC(NEWTOY(wc, "cwl", TOYFLAG_USR|TOYFLAG_BIN))
+USE_WC(NEWTOY(wc, "mcwl", TOYFLAG_USR|TOYFLAG_BIN))
config WC
bool "wc"
default y
help
- usage: wc -lwc [FILE...]
+ usage: wc -lwcm [FILE...]
Count lines, words, and characters in input.
-l show lines
-w show words
- -c show characters
+ -c show bytes
+ -m show characters
- By default outputs lines, words, characters, and filename for each
- argument (or from stdin if none).
+ By default outputs lines, words, bytes, and filename for each
+ argument (or from stdin if none). Displays only either bytes
+ or characters.
*/
#define FOR_wc
@@ -47,7 +49,8 @@ static void show_lengths(unsigned long *lengths, char *name)
static void do_wc(int fd, char *name)
{
- int i, len;
+ int i, len, clen=1, space;
+ wchar_t wchar;
unsigned long word=0, lengths[]={0,0,0};
for (;;) {
@@ -57,9 +60,24 @@ static void do_wc(int fd, char *name)
toys.exitval = EXIT_FAILURE;
}
if (len<1) break;
- for (i=0; i<len; i++) {
+ for (i=0; i<len; i+=clen) {
+ if(toys.optflags&8) {
+ clen = mbrtowc(&wchar, toybuf+i, len-i, 0);
+ if(clen==(size_t)(-1)) {
+ if(i!=len-1) {
+ clen = 1;
+ continue;
+ }
+ else break;
+ }
+ if(clen==(size_t)(-2)) break;
+ if(clen==0) clen=1;
+ space = iswspace(wchar);
+ }
+ else space = isspace(toybuf[i]);
+
if (toybuf[i]==10) lengths[0]++;
- if (isspace(toybuf[i])) word=0;
+ if (space) word=0;
else {
if (!word) lengths[1]++;
word=1;
@@ -73,6 +91,8 @@ static void do_wc(int fd, char *name)
void wc_main(void)
{
+ setlocale(LC_ALL, "");
+ toys.optflags |= (toys.optflags&8)>>1;
loopfiles(toys.optargs, do_wc);
if (toys.optc>1) show_lengths(TT.totals, "total");
}