diff options
Diffstat (limited to 'toys/posix/sort.c')
-rw-r--r-- | toys/posix/sort.c | 591 |
1 files changed, 294 insertions, 297 deletions
diff --git a/toys/posix/sort.c b/toys/posix/sort.c index 8f26f5d3..41b020b9 100644 --- a/toys/posix/sort.c +++ b/toys/posix/sort.c @@ -1,6 +1,4 @@ -/* vi: set sw=4 ts=4: - * - * sort.c - put input lines into order +/* sort.c - put input lines into order * * Copyright 2004, 2008 Rob Landley <rob@landley.net> * @@ -9,54 +7,54 @@ USE_SORT(NEWTOY(sort, USE_SORT_FLOAT("g")USE_SORT_BIG("S:T:m" "o:k*t:xbMcszdfi") "run", TOYFLAG_USR|TOYFLAG_BIN)) config SORT - bool "sort" - default y - help - usage: sort [-run] [FILE...] + bool "sort" + default y + help + usage: sort [-run] [FILE...] - Sort all lines of text from input files (or stdin) to stdout. + Sort all lines of text from input files (or stdin) to stdout. - -r reverse - -u unique lines only - -n numeric order (instead of alphabetical) + -r reverse + -u unique lines only + -n numeric order (instead of alphabetical) config SORT_BIG - bool "SuSv3 options (Support -ktcsbdfiozM)" - default y - depends on SORT - help - usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE] - - -b ignore leading blanks (or trailing blanks in second part of key) - -c check whether input is sorted - -d dictionary order (use alphanumeric and whitespace chars only) - -f force uppercase (case insensitive sort) - -i ignore nonprinting characters - -M month sort (jan, feb, etc). - -x Hexadecimal numerical sort - -s skip fallback sort (only sort with keys) - -z zero (null) terminated input - -k sort by "key" (see below) - -t use a key separator other than whitespace - -o output to FILE instead of stdout - - Sorting by key looks at a subset of the words on each line. -k2 - uses the second word to the end of the line, -k2,2 looks at only - the second word, -k2,4 looks from the start of the second to the end - of the fourth word. Specifying multiple keys uses the later keys as - tie breakers, in order. A type specifier appended to a sort key - (such as -2,2n) applies only to sorting that key. + bool "SuSv3 options (Support -ktcsbdfiozM)" + default y + depends on SORT + help + usage: sort [-bcdfiMsz] [-k#[,#[x]] [-t X]] [-o FILE] + + -b ignore leading blanks (or trailing blanks in second part of key) + -c check whether input is sorted + -d dictionary order (use alphanumeric and whitespace chars only) + -f force uppercase (case insensitive sort) + -i ignore nonprinting characters + -M month sort (jan, feb, etc). + -x Hexadecimal numerical sort + -s skip fallback sort (only sort with keys) + -z zero (null) terminated input + -k sort by "key" (see below) + -t use a key separator other than whitespace + -o output to FILE instead of stdout + + Sorting by key looks at a subset of the words on each line. -k2 + uses the second word to the end of the line, -k2,2 looks at only + the second word, -k2,4 looks from the start of the second to the end + of the fourth word. Specifying multiple keys uses the later keys as + tie breakers, in order. A type specifier appended to a sort key + (such as -2,2n) applies only to sorting that key. config SORT_FLOAT - bool "Floating point (-g)" - default y - depends on SORT_BIG - help - usage: sort [-g] + bool "Floating point (-g)" + default y + depends on SORT_BIG + help + usage: sort [-g] - This version of sort requires floating point. + This version of sort requires floating point. - -g general numeric sort (double precision with nan and inf) + -g general numeric sort (double precision with nan and inf) */ @@ -64,14 +62,14 @@ config SORT_FLOAT #include "toys.h" GLOBALS( - char *key_separator; - struct arg_list *raw_keys; - char *outfile; - char *ignore1, ignore2; // GNU compatability NOPs for -S and -T. - - void *key_list; - int linecount; - char **lines; + char *key_separator; + struct arg_list *raw_keys; + char *outfile; + char *ignore1, ignore2; // GNU compatability NOPs for -S and -T. + + void *key_list; + int linecount; + char **lines; ) // The sort types are n, g, and M. @@ -83,321 +81,320 @@ GLOBALS( struct sort_key { - struct sort_key *next_key; // linked list - unsigned range[4]; // start word, start char, end word, end char - int flags; + struct sort_key *next_key; // linked list + unsigned range[4]; // start word, start char, end word, end char + int flags; }; // Copy of the part of this string corresponding to a key/flags. static char *get_key_data(char *str, struct sort_key *key, int flags) { - int start=0, end, len, i, j; + int start=0, end, len, i, j; - // Special case whole string, so we don't have to make a copy + // Special case whole string, so we don't have to make a copy - if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] - && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; + if(key->range[0]==1 && !key->range[1] && !key->range[2] && !key->range[3] + && !(flags&(FLAG_b&FLAG_d&FLAG_f&FLAG_i&FLAG_bb))) return str; - // Find start of key on first pass, end on second pass + // Find start of key on first pass, end on second pass - len = strlen(str); - for (j=0; j<2; j++) { - if (!key->range[2*j]) end=len; + len = strlen(str); + for (j=0; j<2; j++) { + if (!key->range[2*j]) end=len; - // Loop through fields - else { - end=0; - for (i=1; i < key->range[2*j]+j; i++) { + // Loop through fields + else { + end=0; + for (i=1; i < key->range[2*j]+j; i++) { - // Skip leading blanks - if (str[end] && !TT.key_separator) - while (isspace(str[end])) end++; + // Skip leading blanks + if (str[end] && !TT.key_separator) + while (isspace(str[end])) end++; - // Skip body of key - for (; str[end]; end++) { - if (TT.key_separator) { - if (str[end]==*TT.key_separator) break; - } else if (isspace(str[end])) break; - } - } + // Skip body of key + for (; str[end]; end++) { + if (TT.key_separator) { + if (str[end]==*TT.key_separator) break; + } else if (isspace(str[end])) break; } - if (!j) start=end; - } - - // Key with explicit separator starts after the separator - if (TT.key_separator && str[start]==*TT.key_separator) start++; - - // Strip leading and trailing whitespace if necessary - if (flags&FLAG_b) while (isspace(str[start])) start++; - if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; - - // Handle offsets on start and end - if (key->range[3]) { - end += key->range[3]-1; - if (end>len) end=len; - } - if (key->range[1]) { - start += key->range[1]-1; - if (start>len) start=len; - } - - // Make the copy - if (end<start) end=start; - str = xstrndup(str+start, end-start); - - // Handle -d - if (flags&FLAG_d) { - for (start = end = 0; str[end]; end++) - if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; - str[start] = 0; - } - - // Handle -i - if (flags&FLAG_i) { - for (start = end = 0; str[end]; end++) - if (isprint(str[end])) str[start++] = str[end]; - str[start] = 0; + } } - - // Handle -f - if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]); - - return str; + if (!j) start=end; + } + + // Key with explicit separator starts after the separator + if (TT.key_separator && str[start]==*TT.key_separator) start++; + + // Strip leading and trailing whitespace if necessary + if (flags&FLAG_b) while (isspace(str[start])) start++; + if (flags&FLAG_bb) while (end>start && isspace(str[end-1])) end--; + + // Handle offsets on start and end + if (key->range[3]) { + end += key->range[3]-1; + if (end>len) end=len; + } + if (key->range[1]) { + start += key->range[1]-1; + if (start>len) start=len; + } + + // Make the copy + if (end<start) end=start; + str = xstrndup(str+start, end-start); + + // Handle -d + if (flags&FLAG_d) { + for (start = end = 0; str[end]; end++) + if (isspace(str[end]) || isalnum(str[end])) str[start++] = str[end]; + str[start] = 0; + } + + // Handle -i + if (flags&FLAG_i) { + for (start = end = 0; str[end]; end++) + if (isprint(str[end])) str[start++] = str[end]; + str[start] = 0; + } + + // Handle -f + if (flags*FLAG_f) for(i=0; str[i]; i++) str[i] = toupper(str[i]); + + return str; } // append a sort_key to key_list. static struct sort_key *add_key(void) { - void **stupid_compiler = &TT.key_list; - struct sort_key **pkey = (struct sort_key **)stupid_compiler; + void **stupid_compiler = &TT.key_list; + struct sort_key **pkey = (struct sort_key **)stupid_compiler; - while (*pkey) pkey = &((*pkey)->next_key); - return *pkey = xzalloc(sizeof(struct sort_key)); + while (*pkey) pkey = &((*pkey)->next_key); + return *pkey = xzalloc(sizeof(struct sort_key)); } // Perform actual comparison static int compare_values(int flags, char *x, char *y) { - int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x); - - // Ascii sort - if (!ff) return strcmp(x, y); + int ff = flags & (FLAG_n|FLAG_g|FLAG_M|FLAG_x); - if (CFG_SORT_FLOAT && ff == FLAG_g) { - char *xx,*yy; - double dx = strtod(x,&xx), dy = strtod(y,&yy); - int xinf, yinf; + // Ascii sort + if (!ff) return strcmp(x, y); - // not numbers < NaN < -infinity < numbers < +infinity + if (CFG_SORT_FLOAT && ff == FLAG_g) { + char *xx,*yy; + double dx = strtod(x,&xx), dy = strtod(y,&yy); + int xinf, yinf; - if (x==xx) return y==yy ? 0 : -1; - if (y==yy) return 1; + // not numbers < NaN < -infinity < numbers < +infinity - // Check for isnan - if (dx!=dx) return (dy!=dy) ? 0 : -1; - if (dy!=dy) return 1; + if (x==xx) return y==yy ? 0 : -1; + if (y==yy) return 1; - // Check for infinity. (Could underflow, but avoids needing libm.) - xinf = (1.0/dx == 0.0); - yinf = (1.0/dy == 0.0); - if (xinf) { - if(dx<0) return (yinf && dy<0) ? 0 : -1; - return (yinf && dy>0) ? 0 : 1; - } - if (yinf) return dy<0 ? 1 : -1; - - return dx>dy ? 1 : (dx<dy ? -1 : 0); - } else if (CFG_SORT_BIG && ff == FLAG_M) { - struct tm thyme; - int dx; - char *xx,*yy; - - xx = strptime(x,"%b",&thyme); - dx = thyme.tm_mon; - yy = strptime(y,"%b",&thyme); - if (!xx) return !yy ? 0 : -1; - else if (!yy) return 1; - else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; - - } else if (CFG_SORT_BIG && ff == FLAG_x) { - return strtol(x, NULL, 16)-strtol(y, NULL, 16); - // This has to be ff == FLAG_n - } else { - // Full floating point version of -n - if (CFG_SORT_FLOAT) { - double dx = atof(x), dy = atof(y); + // Check for isnan + if (dx!=dx) return (dy!=dy) ? 0 : -1; + if (dy!=dy) return 1; - return dx>dy ? 1 : (dx<dy ? -1 : 0); - // Integer version of -n for tiny systems - } else return atoi(x)-atoi(y); + // Check for infinity. (Could underflow, but avoids needing libm.) + xinf = (1.0/dx == 0.0); + yinf = (1.0/dy == 0.0); + if (xinf) { + if(dx<0) return (yinf && dy<0) ? 0 : -1; + return (yinf && dy>0) ? 0 : 1; } + if (yinf) return dy<0 ? 1 : -1; + + return dx>dy ? 1 : (dx<dy ? -1 : 0); + } else if (CFG_SORT_BIG && ff == FLAG_M) { + struct tm thyme; + int dx; + char *xx,*yy; + + xx = strptime(x,"%b",&thyme); + dx = thyme.tm_mon; + yy = strptime(y,"%b",&thyme); + if (!xx) return !yy ? 0 : -1; + else if (!yy) return 1; + else return dx==thyme.tm_mon ? 0 : dx-thyme.tm_mon; + + } else if (CFG_SORT_BIG && ff == FLAG_x) { + return strtol(x, NULL, 16)-strtol(y, NULL, 16); + // This has to be ff == FLAG_n + } else { + // Full floating point version of -n + if (CFG_SORT_FLOAT) { + double dx = atof(x), dy = atof(y); + + return dx>dy ? 1 : (dx<dy ? -1 : 0); + // Integer version of -n for tiny systems + } else return atoi(x)-atoi(y); + } } - // Callback from qsort(): Iterate through key_list and perform comparisons. static int compare_keys(const void *xarg, const void *yarg) { - int flags = toys.optflags, retval = 0; - char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; - struct sort_key *key; - - if (CFG_SORT_BIG) { - for (key=(struct sort_key *)TT.key_list; !retval && key; - key = key->next_key) - { - flags = key->flags ? key->flags : toys.optflags; + int flags = toys.optflags, retval = 0; + char *x, *y, *xx = *(char **)xarg, *yy = *(char **)yarg; + struct sort_key *key; - // Chop out and modify key chunks, handling -dfib + if (CFG_SORT_BIG) { + for (key=(struct sort_key *)TT.key_list; !retval && key; + key = key->next_key) + { + flags = key->flags ? key->flags : toys.optflags; - x = get_key_data(xx, key, flags); - y = get_key_data(yy, key, flags); + // Chop out and modify key chunks, handling -dfib - retval = compare_values(flags, x, y); + x = get_key_data(xx, key, flags); + y = get_key_data(yy, key, flags); - // Free the copies get_key_data() made. + retval = compare_values(flags, x, y); - if (x != xx) free(x); - if (y != yy) free(y); + // Free the copies get_key_data() made. - if (retval) break; - } - } else retval = compare_values(flags, xx, yy); + if (x != xx) free(x); + if (y != yy) free(y); - // Perform fallback sort if necessary - if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) { - retval = strcmp(xx, yy); - flags = toys.optflags; + if (retval) break; } + } else retval = compare_values(flags, xx, yy); + + // Perform fallback sort if necessary + if (!retval && !(CFG_SORT_BIG && (toys.optflags&FLAG_s))) { + retval = strcmp(xx, yy); + flags = toys.optflags; + } - return retval * ((flags&FLAG_r) ? -1 : 1); + return retval * ((flags&FLAG_r) ? -1 : 1); } // Callback from loopfiles to handle input files. static void sort_read(int fd, char *name) { - // Read each line from file, appending to a big array. - - for (;;) { - char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z)) - ? get_rawline(fd, NULL, 0) : get_line(fd); - - if (!line) break; - - // handle -c here so we don't allocate more memory than necessary. - if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) { - int j = (toys.optflags&FLAG_u) ? -1 : 0; - - if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) - error_exit("%s: Check line %d\n", name, TT.linecount); - free(TT.lines); - TT.lines = (char **)line; - } else { - if (!(TT.linecount&63)) - TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); - TT.lines[TT.linecount] = line; - } - TT.linecount++; + // Read each line from file, appending to a big array. + + for (;;) { + char * line = (CFG_SORT_BIG && (toys.optflags&FLAG_z)) + ? get_rawline(fd, NULL, 0) : get_line(fd); + + if (!line) break; + + // handle -c here so we don't allocate more memory than necessary. + if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) { + int j = (toys.optflags&FLAG_u) ? -1 : 0; + + if (TT.lines && compare_keys((void *)&TT.lines, &line)>j) + error_exit("%s: Check line %d\n", name, TT.linecount); + free(TT.lines); + TT.lines = (char **)line; + } else { + if (!(TT.linecount&63)) + TT.lines = xrealloc(TT.lines, sizeof(char *)*(TT.linecount+64)); + TT.lines[TT.linecount] = line; } + TT.linecount++; + } } void sort_main(void) { - int idx, fd = 1; - - // Open output file if necessary. - if (CFG_SORT_BIG && TT.outfile) - fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666); - - // Parse -k sort keys. - if (CFG_SORT_BIG && TT.raw_keys) { - struct arg_list *arg; - - for (arg = TT.raw_keys; arg; arg = arg->next) { - struct sort_key *key = add_key(); - char *temp; - int flag; - - idx = 0; - temp = arg->arg; - while (*temp) { - // Start of range - key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); - if (*temp=='.') - key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); - - // Handle flags appended to a key type. - for (;*temp;temp++) { - char *temp2, *optlist; - - // Note that a second comma becomes an "Unknown key" error. - - if (*temp==',' && !idx++) { - temp++; - break; - } - - // Which flag is this? - - optlist = toys.which->options; - temp2 = strchr(optlist, *temp); - flag = (1<<(optlist-temp2+strlen(optlist)-1)); - - // Was it a flag that can apply to a key? - - if (!temp2 || flag>FLAG_b - || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) - { - error_exit("Unknown key option."); - } - // b after , means strip _trailing_ space, not leading. - if (idx && flag==FLAG_b) flag = FLAG_bb; - key->flags |= flag; - } - } + int idx, fd = 1; + + // Open output file if necessary. + if (CFG_SORT_BIG && TT.outfile) + fd = xcreate(TT.outfile, O_CREAT|O_TRUNC|O_WRONLY, 0666); + + // Parse -k sort keys. + if (CFG_SORT_BIG && TT.raw_keys) { + struct arg_list *arg; + + for (arg = TT.raw_keys; arg; arg = arg->next) { + struct sort_key *key = add_key(); + char *temp; + int flag; + + idx = 0; + temp = arg->arg; + while (*temp) { + // Start of range + key->range[2*idx] = (unsigned)strtol(temp, &temp, 10); + if (*temp=='.') + key->range[(2*idx)+1] = (unsigned)strtol(temp+1, &temp, 10); + + // Handle flags appended to a key type. + for (;*temp;temp++) { + char *temp2, *optlist; + + // Note that a second comma becomes an "Unknown key" error. + + if (*temp==',' && !idx++) { + temp++; + break; + } + + // Which flag is this? + + optlist = toys.which->options; + temp2 = strchr(optlist, *temp); + flag = (1<<(optlist-temp2+strlen(optlist)-1)); + + // Was it a flag that can apply to a key? + + if (!temp2 || flag>FLAG_b + || (flag&(FLAG_u|FLAG_c|FLAG_s|FLAG_z))) + { + error_exit("Unknown key option."); + } + // b after , means strip _trailing_ space, not leading. + if (idx && flag==FLAG_b) flag = FLAG_bb; + key->flags |= flag; } + } } + } - // global b flag strips both leading and trailing spaces - if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb; + // global b flag strips both leading and trailing spaces + if (toys.optflags&FLAG_b) toys.optflags |= FLAG_bb; - // If no keys, perform alphabetic sort over the whole line. - if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1; + // If no keys, perform alphabetic sort over the whole line. + if (CFG_SORT_BIG && !TT.key_list) add_key()->range[0] = 1; - // Open input files and read data, populating TT.lines[TT.linecount] - loopfiles(toys.optargs, sort_read); + // Open input files and read data, populating TT.lines[TT.linecount] + loopfiles(toys.optargs, sort_read); - // The compare (-c) logic was handled in sort_read(), - // so if we got here, we're done. - if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now; + // The compare (-c) logic was handled in sort_read(), + // so if we got here, we're done. + if (CFG_SORT_BIG && (toys.optflags&FLAG_c)) goto exit_now; - // Perform the actual sort - qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); + // Perform the actual sort + qsort(TT.lines, TT.linecount, sizeof(char *), compare_keys); - // handle unique (-u) - if (toys.optflags&FLAG_u) { - int jdx; + // handle unique (-u) + if (toys.optflags&FLAG_u) { + int jdx; - for (jdx=0, idx=1; idx<TT.linecount; idx++) { - if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) - free(TT.lines[idx]); - else TT.lines[++jdx] = TT.lines[idx]; - } - if (TT.linecount) TT.linecount = jdx+1; + for (jdx=0, idx=1; idx<TT.linecount; idx++) { + if (!compare_keys(&TT.lines[jdx], &TT.lines[idx])) + free(TT.lines[idx]); + else TT.lines[++jdx] = TT.lines[idx]; } + if (TT.linecount) TT.linecount = jdx+1; + } - // Output result - for (idx = 0; idx<TT.linecount; idx++) { - char *s = TT.lines[idx]; - xwrite(fd, s, strlen(s)); - if (CFG_TOYBOX_FREE) free(s); - xwrite(fd, "\n", 1); - } + // Output result + for (idx = 0; idx<TT.linecount; idx++) { + char *s = TT.lines[idx]; + xwrite(fd, s, strlen(s)); + if (CFG_TOYBOX_FREE) free(s); + xwrite(fd, "\n", 1); + } exit_now: - if (CFG_TOYBOX_FREE) { - if (fd != 1) close(fd); - free(TT.lines); - } + if (CFG_TOYBOX_FREE) { + if (fd != 1) close(fd); + free(TT.lines); + } } |