aboutsummaryrefslogtreecommitdiff
path: root/toys/uniq.c
blob: c16c08f104c3d67f9d3725ac704be1d89dce3b7f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
/* vi: set sw=4 ts=4:
 *
 * uniq.c - report or filter out repeated lines in a file
 *
 * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
 *
 * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html

USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))

config UNIQ
	bool "uniq"
	default y
	help
	  usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]

	  Report or filter out repeated lines in a file

	  -c	show counts before each line
	  -d	show only lines that are repeated
	  -u	show only lines that are unique
	  -i	ignore case when comparing lines
	  -z	lines end with \0 not \n
	  -w	compare maximum X chars per line
	  -f	ignore first X fields
	  -s	ignore first X chars
*/

#include "toys.h"

DEFINE_GLOBALS(
	long maxchars;
	long nchars;
	long nfields;
	long repeats;
)

#define TT this.uniq

#define FLAG_z 16
#define FLAG_i 8
#define FLAG_c 4
#define FLAG_d 2
#define FLAG_u 1

static char *skip(char *str)
{
	long nchars = TT.nchars, nfields;

	// Skip fields first
	for (nfields = TT.nfields; nfields; str++) {
		while (*str && isspace(*str)) str++;
		while (*str && !isspace(*str)) str++;
		nfields--;
	}
	// Skip chars
	while (*str && nchars--) str++;

	return str;
}

static void print_line(FILE *f, char *line)
{
	if (toys.optflags & (TT.repeats ? FLAG_u : FLAG_d)) return;
	if (toys.optflags & FLAG_c) fprintf(f, "%7lu ", TT.repeats + 1);
	fputs(line, f);
	if (toys.optflags & FLAG_z) fputc(0, f);
}

void uniq_main(void)
{
	FILE *infile = stdin, *outfile = stdout;
	char *thisline = NULL, *prevline = NULL, *tmpline, eol = '\n';
	size_t thissize, prevsize = 0, tmpsize;

	if (toys.optc >= 1) infile = xfopen(toys.optargs[0], "r");
	if (toys.optc >= 2) outfile = xfopen(toys.optargs[1], "w");

	if (toys.optflags & FLAG_z) eol = 0;

	// If first line can't be read
	if (getdelim(&prevline, &prevsize, eol, infile) < 0)
		return;

	while (getdelim(&thisline, &thissize, eol, infile) > 0) {
		int diff;
		char *t1, *t2;

		// If requested get the chosen fields + character offsets.
		if (TT.nfields || TT.nchars) {
			t1 = skip(thisline);
			t2 = skip(prevline);
		} else {
			t1 = thisline;
			t2 = prevline;
		}

		if (TT.maxchars == 0) {
			diff = !(toys.optflags & FLAG_i)
			        ? strcmp(t1, t2)
			        : strcasecmp(t1, t2);
		} else {
			diff = !(toys.optflags & FLAG_i)
			        ? strncmp(t1, t2, TT.maxchars)
			        : strncasecmp(t1, t2, TT.maxchars);
		}

		if (diff == 0) { // same
			TT.repeats++;
		} else {
			print_line(outfile, prevline);

			TT.repeats = 0;

			tmpline = prevline;
			prevline = thisline;
			thisline = tmpline;

			tmpsize = prevsize;
			prevsize = thissize;
			thissize = tmpsize;
		}
	}

	print_line(outfile, prevline);

	if (CFG_TOYBOX_FREE) {
		free(prevline);
		free(thisline);
	}
}