/* vi: set sw=4 ts=4: */ /* * wc implementation for busybox * * Copyright (C) 2003 Manuel Novoa III <mjn3@codepoet.org> * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA * */ /* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */ /* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */ /* Mar 16, 2003 Manuel Novoa III (mjn3@codepoet.org) * * Rewritten to fix a number of problems and do some size optimizations. * Problems in the previous busybox implementation (besides bloat) included: * 1) broken 'wc -c' optimization (read note below) * 2) broken handling of '-' args * 3) no checking of ferror on EOF returns * 4) isprint() wasn't considered when word counting. * * TODO: * * When locale support is enabled, count multibyte chars in the '-m' case. * * NOTES: * * The previous busybox wc attempted an optimization using stat for the * case of counting chars only. I omitted that because it was broken. * It didn't take into account the possibility of input coming from a * pipe, or input from a file with file pointer not at the beginning. * * To implement such a speed optimization correctly, not only do you * need the size, but also the file position. Note also that the * file position may be past the end of file. Consider the example * (adapted from example in gnu wc.c) * * echo hello > /tmp/testfile && * (dd ibs=1k skip=1 count=0 &> /dev/null ; wc -c) < /tmp/testfile * * for which 'wc -c' should output '0'. */ #include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> #include "busybox.h" #ifdef CONFIG_LOCALE_SUPPORT #include <locale.h> #include <ctype.h> #define isspace_given_isprint(c) isspace(c) #else #undef isspace #undef isprint #define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9)))) #define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20)) #define isspace_given_isprint(c) ((c) == ' ') #endif enum { WC_LINES = 0, WC_WORDS = 1, WC_CHARS = 2, WC_LENGTH = 3 }; /* Note: If this changes, remember to change the initialization of * 'name' in wc_main. It needs to point to the terminating nul. */ static const char wc_opts[] = "lwcL"; /* READ THE WARNING ABOVE! */ enum { OP_INC_LINE = 1, /* OP_INC_LINE must be 1. */ OP_SPACE = 2, OP_NEWLINE = 4, OP_TAB = 8, OP_NUL = 16, }; /* Note: If fmt_str changes, the offsets to 's' in the OUTPUT section * will need to be updated. */ static const char fmt_str[] = " %7u\0 %s\n"; static const char total_str[] = "total"; int wc_main(int argc, char **argv) { FILE *fp; const char *s; unsigned int *pcounts; unsigned int counts[4]; unsigned int totals[4]; unsigned int linepos; unsigned int u; int num_files = 0; int c; char status = EXIT_SUCCESS; char in_word; char print_type; print_type = bb_getopt_ulflags(argc, argv, wc_opts); if (print_type == 0) { print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS); } argv += optind; if (!*argv) { *--argv = (char *) bb_msg_standard_input; } memset(totals, 0, sizeof(totals)); pcounts = counts; do { ++num_files; if (!(fp = bb_wfopen_input(*argv))) { status = EXIT_FAILURE; continue; } memset(counts, 0, sizeof(counts)); linepos = 0; in_word = 0; do { ++counts[WC_CHARS]; c = getc(fp); if (isprint(c)) { ++linepos; if (!isspace_given_isprint(c)) { in_word = 1; continue; } } else if (((unsigned int)(c - 9)) <= 4) { /* \t 9 * \n 10 * \v 11 * \f 12 * \r 13 */ if (c == '\t') { linepos = (linepos | 7) + 1; } else { /* '\n', '\r', '\f', or '\v' */ DO_EOF: if (linepos > counts[WC_LENGTH]) { counts[WC_LENGTH] = linepos; } if (c == '\n') { ++counts[WC_LINES]; } if (c != '\v') { linepos = 0; } } } else if (c == EOF) { if (ferror(fp)) { bb_perror_msg("%s", *argv); status = EXIT_FAILURE; } --counts[WC_CHARS]; goto DO_EOF; /* Treat an EOF as '\r'. */ } else { continue; } counts[WC_WORDS] += in_word; in_word = 0; if (c == EOF) { break; } } while (1); if (totals[WC_LENGTH] < counts[WC_LENGTH]) { totals[WC_LENGTH] = counts[WC_LENGTH]; } totals[WC_LENGTH] -= counts[WC_LENGTH]; bb_fclose_nonstdin(fp); OUTPUT: s = fmt_str + 1; /* Skip the leading space on 1st pass. */ u = 0; do { if (print_type & (1 << u)) { bb_printf(s, pcounts[u]); s = fmt_str; /* Ok... restore the leading space. */ } totals[u] += pcounts[u]; } while (++u < 4); s += 8; /* Set the format to the empty string. */ if (*argv != bb_msg_standard_input) { s -= 3; /* We have a name, so do %s conversion. */ } bb_printf(s, *argv); } while (*++argv); /* If more than one file was processed, we want the totals. To save some * space, we set the pcounts ptr to the totals array. This has the side * effect of trashing the totals array after outputting it, but that's * irrelavent since we no longer need it. */ if (num_files > 1) { num_files = 0; /* Make sure we don't get here again. */ *--argv = (char *) total_str; pcounts = totals; goto OUTPUT; } bb_fflush_stdout_and_exit(status); }