aboutsummaryrefslogtreecommitdiff
path: root/toys
diff options
context:
space:
mode:
authorGeorgi Chorbadzhiyski <gf@unixsol.org>2012-03-14 22:04:06 -0500
committerGeorgi Chorbadzhiyski <gf@unixsol.org>2012-03-14 22:04:06 -0500
commit16a29268033609664500b3333537cd62fcff50fb (patch)
tree7ea3dc14d794a07eadbefa169c82d0ccbd514e16 /toys
parentb9bfb8731cb3bed41e55db8d70ffa045a25f63ae (diff)
downloadtoybox-16a29268033609664500b3333537cd62fcff50fb.tar.gz
Implement uniq.
Diffstat (limited to 'toys')
-rw-r--r--toys/uniq.c154
1 files changed, 154 insertions, 0 deletions
diff --git a/toys/uniq.c b/toys/uniq.c
new file mode 100644
index 00000000..03913e71
--- /dev/null
+++ b/toys/uniq.c
@@ -0,0 +1,154 @@
+/* vi: set sw=4 ts=4:
+ *
+ * uniq.c - report or filter out repeated lines in a file
+ *
+ * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org>
+ *
+ * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html
+
+USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN))
+
+config UNIQ
+ bool "uniq"
+ default y
+ help
+ usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]]
+
+ Report or filter out repeated lines in a file
+
+ -c show counts before each line
+ -d show only lines that are repeated
+ -u show only lines that are unique
+ -i ignore case when comparing lines
+ -z lines end with \0 not \n
+ -w compare maximum X chars per line
+ -f ignore first X fields
+ -s ignore first X chars
+*/
+
+#include "toys.h"
+
+DEFINE_GLOBALS(
+ long maxchars;
+ long nchars;
+ long nfields;
+ long repeats;
+)
+
+#define TT this.uniq
+
+#define FLAG_z 16
+#define FLAG_i 8
+#define FLAG_c 4
+#define FLAG_d 2
+#define FLAG_u 1
+
+static char *skip(char *str)
+{
+ int field = 0;
+ long nchars = TT.nchars;
+ long nfields = TT.nfields;
+ // Skip fields first
+ while (nfields && *str) {
+ if (isspace((unsigned char)*str)) {
+ if (field) {
+ field = 0;
+ nfields--;
+ }
+ } else if (!field) {
+ field = 1;
+ }
+ str++;
+ }
+ // Skip chars
+ while (nchars-- && *str)
+ str++;
+ return str;
+}
+
+static void print_line(FILE *f, char *line)
+{
+ if (TT.repeats == 0 && (toys.optflags & FLAG_d))
+ return;
+ if (TT.repeats > 0 && (toys.optflags & FLAG_u))
+ return;
+ if ((toys.optflags & FLAG_c)) {
+ fprintf(f, "%7lu %s", TT.repeats + 1, line);
+ } else {
+ fprintf(f, "%s", line);
+ }
+ if (toys.optflags & FLAG_z)
+ fprintf(f, "%c", '\0');
+}
+
+void uniq_main(void)
+{
+ FILE *infile = stdin;
+ FILE *outfile = stdout;
+ char *thisline = NULL;
+ char *prevline = NULL;
+ size_t thissize, prevsize = 0;
+ char *tmpline;
+ char eol = '\n';
+ size_t tmpsize;
+
+ if (toys.optc >= 1)
+ infile = xfopen(toys.optargs[0], "r");
+
+ if (toys.optc >= 2)
+ outfile = xfopen(toys.optargs[1], "w");
+
+ if (toys.optflags & FLAG_z)
+ eol = '\0';
+
+ // If first line can't be read
+ if (getdelim(&prevline, &prevsize, eol, infile) < 0)
+ return;
+
+ while (getdelim(&thisline, &thissize, eol, infile) > 0) {
+ int diff;
+ char *t1, *t2;
+
+ // If requested get the chosen fields + character offsets.
+ if (TT.nfields || TT.nchars) {
+ t1 = skip(thisline);
+ t2 = skip(prevline);
+ } else {
+ t1 = thisline;
+ t2 = prevline;
+ }
+
+ if (TT.maxchars == 0) {
+ diff = !(toys.optflags & FLAG_i)
+ ? strcmp(t1, t2)
+ : strcasecmp(t1, t2);
+ } else {
+ diff = !(toys.optflags & FLAG_i)
+ ? strncmp(t1, t2, TT.maxchars)
+ : strncasecmp(t1, t2, TT.maxchars);
+ }
+
+ if (diff == 0) { // same
+ TT.repeats++;
+ } else {
+ print_line(outfile, prevline);
+
+ TT.repeats = 0;
+
+ tmpline = prevline;
+ prevline = thisline;
+ thisline = tmpline;
+
+ tmpsize = prevsize;
+ prevsize = thissize;
+ thissize = tmpsize;
+ }
+ }
+
+ print_line(outfile, prevline);
+
+ if (CFG_TOYBOX_FREE) {
+ free(prevline);
+ free(thisline);
+ }
+}