From 2ba9c414e02197075e7e90ca077dda19fae75cda Mon Sep 17 00:00:00 2001 From: Rob Landley Date: Mon, 20 Mar 2017 11:11:34 -0500 Subject: Rewrite paste so it actually works. --- toys/posix/paste.c | 173 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 103 insertions(+), 70 deletions(-) (limited to 'toys/posix/paste.c') diff --git a/toys/posix/paste.c b/toys/posix/paste.c index 5ab3448d..508b3518 100644 --- a/toys/posix/paste.c +++ b/toys/posix/paste.c @@ -1,99 +1,132 @@ -/* paste.c - Replace newlines +/* paste.c - Merge corresponding lines * * Copyright 2012 Felix Janda * * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/paste.html * -USE_PASTE(NEWTOY(paste, "d:s", TOYFLAG_BIN)) + * Deviations from posix: the FILE argument isn't mandatory, none == '-' + +USE_PASTE(NEWTOY(paste, "d:s", TOYFLAG_BIN|TOYFLAG_LOCALE)) config PASTE bool "paste" default y help - usage: paste [-s] [-d list] [file...] - - Replace newlines in files. + usage: paste [-s] [-d DELIMITERS] [FILE...] - -d list list of delimiters to separate lines - -s process files sequentially instead of in parallel + Merge corresponding lines from each input file. - By default print corresponding lines separated by . + -d list of delimiter characters to separate fields with (default is \t) + -s sequential mode: turn each input file into one line of output */ + #define FOR_paste #include "toys.h" GLOBALS( - char *delim; + char *d; + + int files; ) -void paste_main(void) +// \0 is weird, and -d "" is also weird. + +static void paste_files(void) { - char *p, *buf = toybuf, **args = toys.optargs; - size_t ndelim = 0; - int i, j, c; - - // Process delimiter list - // TODO: Handle multibyte characters - if (!(toys.optflags & FLAG_d)) TT.delim = "\t"; - for (p = TT.delim; *p; p++, buf++, ndelim++) { - if (*p == '\\') { - p++; - if (-1 == (i = stridx("nt\\0", *p))) - error_exit("bad delimiter: \\%c", *p); - *buf = "\n\t\\\0"[i]; - } else *buf = *p; - } - *buf = 0; - - if (toys.optflags & FLAG_s) { // Sequential - FILE *f; - - for (; *args; args++) { - if ((*args)[0] == '-' && !(*args)[1]) f = stdin; - else if (!(f = fopen(*args, "r"))) perror_exit_raw(*args); - for (i = 0, c = 0; c != EOF;) { - switch(c = getc(f)) { - case '\n': - putchar(toybuf[i++ % ndelim]); - case EOF: - break; - default: - putchar(c); - } + FILE **fps = (void *)toybuf; + char *dpos, *dstr, *buf, c; + int i, any, dcount, dlen, len, seq = toys.optflags&FLAG_s; + + // Loop through lines until no input left + for (;;) { + + // Start of each line/file resets delimiter cycle + dpos = TT.d; + mbtowc(0, 0, 0); + + for (i = any = dcount = dlen = 0; seq || i=(len = getline(&buf, &blen, ff))) { + if (ff && ff!=stdin) fclose(ff); + if (seq) return; + fps[i] = 0; + if (!any) continue; } - if (f != stdin) fclose(f); - putchar('\n'); - } - } else { // Parallel - // Need to be careful not to print an extra line at the end - FILE **files; - int anyopen = 1; - - files = (FILE**)(buf + 1); - for (; *args; args++, files++) { - if ((*args)[0] == '-' && !(*args)[1]) *files = stdin; - else if (!(*files = fopen(*args, "r"))) perror_exit_raw(*args); - } - while (anyopen) { - anyopen = 0; - for (i = 0; i < toys.optc; i++) { - FILE **f = (FILE**)(buf + 1) + i; - - if (*f) for (;;) { - c = getc(*f); - if (c != EOF) { - if (!anyopen++) for (j = 0; j < i; j++) putchar(toybuf[j % ndelim]); - if (c != '\n') putchar(c); - else break; - } + dcount = any ? 1 : i; + any = 1; + + // Output delimiters as necessary: not at beginning/end of line, + // catch up if first few files had no input but a later one did. + // Entire line with no input means no output. + + while (dcount) { + + // Find next delimiter, which can be "", \n, or UTF8 w/combining chars + dstr = dpos; + dlen = 0; + dcount--; + + if (!*TT.d) {;} + else if (*dpos == '\\') { + if (*++dpos=='0') dpos++; else { - if (*f != stdin) fclose(*f); - *f = 0; + dlen = 1; + if ((c = unescape(*dpos))) { + dstr = &c; + dpos++; + } + } + dpos++; + } else { + while (0<(dlen = mbtowc(&wc, dpos, 99))) { + dpos += dlen; + if (!(dlen = wcwidth(wc))) continue; + if (dlen<0) dpos = dstr+1; break; } + dlen = dpos-dstr; } - if (anyopen) putchar((i + 1 == toys.optc) ? toybuf[i % ndelim] : '\n'); + if (!*dpos) dpos = TT.d; + + if (dlen) fwrite(dstr, dlen, 1, stdout); + } + + if (0= sizeof(toybuf)/sizeof(FILE *)) perror_exit("tilt"); + if (toys.optflags&FLAG_s) { + paste_files(); + xputc('\n'); + TT.files = 0; + } +} + +void paste_main(void) +{ + if (!(toys.optflags&FLAG_d)) TT.d = "\t"; + + loopfiles_rw(toys.optargs, O_RDONLY, 0, do_paste); + if (!(toys.optflags&FLAG_s)) paste_files(); +} -- cgit v1.2.3