diff options
author | Georgi Chorbadzhiyski <gf@unixsol.org> | 2012-03-14 22:04:06 -0500 |
---|---|---|
committer | Georgi Chorbadzhiyski <gf@unixsol.org> | 2012-03-14 22:04:06 -0500 |
commit | 16a29268033609664500b3333537cd62fcff50fb (patch) | |
tree | 7ea3dc14d794a07eadbefa169c82d0ccbd514e16 /toys | |
parent | b9bfb8731cb3bed41e55db8d70ffa045a25f63ae (diff) | |
download | toybox-16a29268033609664500b3333537cd62fcff50fb.tar.gz |
Implement uniq.
Diffstat (limited to 'toys')
-rw-r--r-- | toys/uniq.c | 154 |
1 files changed, 154 insertions, 0 deletions
diff --git a/toys/uniq.c b/toys/uniq.c new file mode 100644 index 00000000..03913e71 --- /dev/null +++ b/toys/uniq.c @@ -0,0 +1,154 @@ +/* vi: set sw=4 ts=4: + * + * uniq.c - report or filter out repeated lines in a file + * + * Copyright 2012 Georgi Chorbadzhiyski <georgi@unixsol.org> + * + * See http://www.opengroup.org/onlinepubs/009695399/utilities/uniq.html + +USE_UNIQ(NEWTOY(uniq, "f#s#w#zicdu", TOYFLAG_BIN)) + +config UNIQ + bool "uniq" + default y + help + usage: uniq [-cduiz] [-w maxchars] [-f fields] [-s char] [input_file [output_file]] + + Report or filter out repeated lines in a file + + -c show counts before each line + -d show only lines that are repeated + -u show only lines that are unique + -i ignore case when comparing lines + -z lines end with \0 not \n + -w compare maximum X chars per line + -f ignore first X fields + -s ignore first X chars +*/ + +#include "toys.h" + +DEFINE_GLOBALS( + long maxchars; + long nchars; + long nfields; + long repeats; +) + +#define TT this.uniq + +#define FLAG_z 16 +#define FLAG_i 8 +#define FLAG_c 4 +#define FLAG_d 2 +#define FLAG_u 1 + +static char *skip(char *str) +{ + int field = 0; + long nchars = TT.nchars; + long nfields = TT.nfields; + // Skip fields first + while (nfields && *str) { + if (isspace((unsigned char)*str)) { + if (field) { + field = 0; + nfields--; + } + } else if (!field) { + field = 1; + } + str++; + } + // Skip chars + while (nchars-- && *str) + str++; + return str; +} + +static void print_line(FILE *f, char *line) +{ + if (TT.repeats == 0 && (toys.optflags & FLAG_d)) + return; + if (TT.repeats > 0 && (toys.optflags & FLAG_u)) + return; + if ((toys.optflags & FLAG_c)) { + fprintf(f, "%7lu %s", TT.repeats + 1, line); + } else { + fprintf(f, "%s", line); + } + if (toys.optflags & FLAG_z) + fprintf(f, "%c", '\0'); +} + +void uniq_main(void) +{ + FILE *infile = stdin; + FILE *outfile = stdout; + char *thisline = NULL; + char *prevline = NULL; + size_t thissize, prevsize = 0; + char *tmpline; + char eol = '\n'; + size_t tmpsize; + + if (toys.optc >= 1) + infile = xfopen(toys.optargs[0], "r"); + + if (toys.optc >= 2) + outfile = xfopen(toys.optargs[1], "w"); + + if (toys.optflags & FLAG_z) + eol = '\0'; + + // If first line can't be read + if (getdelim(&prevline, &prevsize, eol, infile) < 0) + return; + + while (getdelim(&thisline, &thissize, eol, infile) > 0) { + int diff; + char *t1, *t2; + + // If requested get the chosen fields + character offsets. + if (TT.nfields || TT.nchars) { + t1 = skip(thisline); + t2 = skip(prevline); + } else { + t1 = thisline; + t2 = prevline; + } + + if (TT.maxchars == 0) { + diff = !(toys.optflags & FLAG_i) + ? strcmp(t1, t2) + : strcasecmp(t1, t2); + } else { + diff = !(toys.optflags & FLAG_i) + ? strncmp(t1, t2, TT.maxchars) + : strncasecmp(t1, t2, TT.maxchars); + } + + if (diff == 0) { // same + TT.repeats++; + } else { + print_line(outfile, prevline); + + TT.repeats = 0; + + tmpline = prevline; + prevline = thisline; + thisline = tmpline; + + tmpsize = prevsize; + prevsize = thissize; + thissize = tmpsize; + } + } + + print_line(outfile, prevline); + + if (CFG_TOYBOX_FREE) { + free(prevline); + free(thisline); + } +} |