From afc7b4c0d83ab82eb20cedae5d9676c3a21af4bf Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Mon, 4 Oct 2010 17:08:14 +0200 Subject: wc: add support for -m function old new delta wc_main 601 637 +36 packed_usage 27357 27358 +1 Signed-off-by: Denys Vlasenko --- coreutils/wc.c | 58 +++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 13 deletions(-) (limited to 'coreutils/wc.c') diff --git a/coreutils/wc.c b/coreutils/wc.c index ae38fd5fe..ecadae59b 100644 --- a/coreutils/wc.c +++ b/coreutils/wc.c @@ -7,7 +7,7 @@ * Licensed under GPLv2 or later, see file LICENSE in this source tree. */ -/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ +/* BB_AUDIT SUSv3 compliant. */ /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) @@ -19,10 +19,6 @@ * 3) no checking of ferror on EOF returns * 4) isprint() wasn't considered when word counting. * - * TODO: - * - * When locale support is enabled, count multibyte chars in the '-m' case. - * * NOTES: * * The previous busybox wc attempted an optimization using stat for the @@ -40,8 +36,8 @@ * * for which 'wc -c' should output '0'. */ - #include "libbb.h" +#include "unicode.h" #if !ENABLE_LOCALE_SUPPORT # undef isprint @@ -58,12 +54,39 @@ # define COUNT_FMT "u" #endif +/* We support -m even when UNICODE_SUPPORT is off, + * we just don't advertise it in help text, + * since it is the same as -c in this case. + */ + +//usage:#define wc_trivial_usage +//usage: "[-c"IF_UNICODE_SUPPORT("m")"lwL] [FILE]..." +//usage: +//usage:#define wc_full_usage "\n\n" +//usage: "Count lines, words, and bytes for each FILE (or stdin)\n" +//usage: "\nOptions:" +//usage: "\n -c Count bytes" +//usage: IF_UNICODE_SUPPORT( +//usage: "\n -m Count characters" +//usage: ) +//usage: "\n -l Count newlines" +//usage: "\n -w Count words" +//usage: "\n -L Print longest line length" +//usage: +//usage:#define wc_example_usage +//usage: "$ wc /etc/passwd\n" +//usage: " 31 46 1365 /etc/passwd\n" + +/* Order is important if we want to be compatible with + * column order in "wc -cmlwL" output: + */ enum { - WC_LINES = 0, - WC_WORDS = 1, - WC_CHARS = 2, - WC_LENGTH = 3, - NUM_WCS = 4, + WC_LINES = 0, + WC_WORDS = 1, + WC_UNICHARS = 2, + WC_CHARS = 3, + WC_LENGTH = 4, + NUM_WCS = 5, }; int wc_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE; @@ -79,7 +102,9 @@ int wc_main(int argc UNUSED_PARAM, char **argv) smallint status = EXIT_SUCCESS; unsigned print_type; - print_type = getopt32(argv, "lwcL"); + init_unicode(); + + print_type = getopt32(argv, "lwcmL"); if (print_type == 0) { print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); @@ -130,9 +155,16 @@ int wc_main(int argc UNUSED_PARAM, char **argv) } goto DO_EOF; /* Treat an EOF as '\r'. */ } + + /* Cater for -c and -m */ ++counts[WC_CHARS]; + if (unicode_status != UNICODE_ON /* every byte is a new char */ + || (c & 0xc0) != 0x80 /* it isn't a 2nd+ byte of a Unicode char */ + ) { + ++counts[WC_UNICHARS]; + } - if (isprint_asciionly(c)) { + if (isprint_asciionly(c)) { /* FIXME: not unicode-aware */ ++linepos; if (!isspace(c)) { in_word = 1; -- cgit v1.2.3