aboutsummaryrefslogtreecommitdiff
path: root/coreutils/wc.c
blob: 4aea7d8260a37252ad91a550ab59c4b47c66a399 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
/* vi: set sw=4 ts=4: */
/*
 * wc implementation for busybox
 *
 * Copyright (C) 2003  Manuel Novoa III  <mjn3@codepoet.org>
 *
 * Licensed under GPLv2 or later, see file LICENSE in this tarball for details.
 */

/* BB_AUDIT SUSv3 _NOT_ compliant -- option -m is not currently supported. */
/* http://www.opengroup.org/onlinepubs/007904975/utilities/wc.html */

/* Mar 16, 2003      Manuel Novoa III   (mjn3@codepoet.org)
 *
 * Rewritten to fix a number of problems and do some size optimizations.
 * Problems in the previous busybox implementation (besides bloat) included:
 *  1) broken 'wc -c' optimization (read note below)
 *  2) broken handling of '-' args
 *  3) no checking of ferror on EOF returns
 *  4) isprint() wasn't considered when word counting.
 *
 * TODO:
 *
 * When locale support is enabled, count multibyte chars in the '-m' case.
 *
 * NOTES:
 *
 * The previous busybox wc attempted an optimization using stat for the
 * case of counting chars only.  I omitted that because it was broken.
 * It didn't take into account the possibility of input coming from a
 * pipe, or input from a file with file pointer not at the beginning.
 *
 * To implement such a speed optimization correctly, not only do you
 * need the size, but also the file position.  Note also that the
 * file position may be past the end of file.  Consider the example
 * (adapted from example in gnu wc.c)
 *
 *      echo hello > /tmp/testfile &&
 *      (dd ibs=1k skip=1 count=0 &> /dev/null ; wc -c) < /tmp/testfile
 *
 * for which 'wc -c' should output '0'.
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include "busybox.h"

#ifdef CONFIG_LOCALE_SUPPORT
#include <locale.h>
#include <ctype.h>
#define isspace_given_isprint(c) isspace(c)
#else
#undef isspace
#undef isprint
#define isspace(c) ((((c) == ' ') || (((unsigned int)((c) - 9)) <= (13 - 9))))
#define isprint(c) (((unsigned int)((c) - 0x20)) <= (0x7e - 0x20))
#define isspace_given_isprint(c) ((c) == ' ')
#endif

enum {
	WC_LINES	= 0,
	WC_WORDS	= 1,
	WC_CHARS	= 2,
	WC_LENGTH	= 3
};

/* Note: If this changes, remember to change the initialization of
 *       'name' in wc_main.  It needs to point to the terminating nul. */
static const char wc_opts[] = "lwcL";	/* READ THE WARNING ABOVE! */

enum {
	OP_INC_LINE	= 1, /* OP_INC_LINE must be 1. */
	OP_SPACE	= 2,
	OP_NEWLINE	= 4,
	OP_TAB		= 8,
	OP_NUL		= 16,
};

/* Note: If fmt_str changes, the offsets to 's' in the OUTPUT section
 *       will need to be updated. */
static const char fmt_str[] = " %7u\0 %s\n";
static const char total_str[] = "total";

int wc_main(int argc, char **argv)
{
	FILE *fp;
	const char *s;
	unsigned int *pcounts;
	unsigned int counts[4];
	unsigned int totals[4];
	unsigned int linepos;
	unsigned int u;
	int num_files = 0;
	int c;
	char status = EXIT_SUCCESS;
	char in_word;
	char print_type;

	print_type = bb_getopt_ulflags(argc, argv, wc_opts);

	if (print_type == 0) {
		print_type = (1 << WC_LINES) | (1 << WC_WORDS) | (1 << WC_CHARS);
	}

	argv += optind;
	if (!*argv) {
		*--argv = (char *) bb_msg_standard_input;
	}

	memset(totals, 0, sizeof(totals));

	pcounts = counts;

	do {
		++num_files;
		if (!(fp = bb_wfopen_input(*argv))) {
			status = EXIT_FAILURE;
			continue;
		}

		memset(counts, 0, sizeof(counts));
		linepos = 0;
		in_word = 0;

		do {
			++counts[WC_CHARS];
			c = getc(fp);
			if (isprint(c)) {
				++linepos;
				if (!isspace_given_isprint(c)) {
					in_word = 1;
					continue;
				}
			} else if (((unsigned int)(c - 9)) <= 4) {
				/* \t  9
				 * \n 10
				 * \v 11
				 * \f 12
				 * \r 13
				 */
				if (c == '\t') {
					linepos = (linepos | 7) + 1;
				} else {			/* '\n', '\r', '\f', or '\v' */
				DO_EOF:
					if (linepos > counts[WC_LENGTH]) {
						counts[WC_LENGTH] = linepos;
					}
					if (c == '\n') {
						++counts[WC_LINES];
					}
					if (c != '\v') {
						linepos = 0;
					}
				}
			} else if (c == EOF) {
				if (ferror(fp)) {
					bb_perror_msg("%s", *argv);
					status = EXIT_FAILURE;
				}
				--counts[WC_CHARS];
				goto DO_EOF;		/* Treat an EOF as '\r'. */
			} else {
				continue;
			}

			counts[WC_WORDS] += in_word;
			in_word = 0;
			if (c == EOF) {
				break;
			}
		} while (1);

		if (totals[WC_LENGTH] < counts[WC_LENGTH]) {
			totals[WC_LENGTH] = counts[WC_LENGTH];
		}
		totals[WC_LENGTH] -= counts[WC_LENGTH];

		bb_fclose_nonstdin(fp);

	OUTPUT:
		s = fmt_str + 1;			/* Skip the leading space on 1st pass. */
		u = 0;
		do {
			if (print_type & (1 << u)) {
				bb_printf(s, pcounts[u]);
				s = fmt_str;		/* Ok... restore the leading space. */
			}
			totals[u] += pcounts[u];
		} while (++u < 4);

		s += 8;						/* Set the format to the empty string. */

		if (*argv != bb_msg_standard_input) {
			s -= 3;					/* We have a name, so do %s conversion. */
		}
		bb_printf(s, *argv);

	} while (*++argv);

	/* If more than one file was processed, we want the totals.  To save some
	 * space, we set the pcounts ptr to the totals array.  This has the side
	 * effect of trashing the totals array after outputting it, but that's
	 * irrelavent since we no longer need it. */
	if (num_files > 1) {
		num_files = 0;				/* Make sure we don't get here again. */
		*--argv = (char *) total_str;
		pcounts = totals;
		goto OUTPUT;
	}

	bb_fflush_stdout_and_exit(status);
}