diff options
-rw-r--r-- | toys/posix/split.c | 108 |
1 files changed, 108 insertions, 0 deletions
diff --git a/toys/posix/split.c b/toys/posix/split.c new file mode 100644 index 00000000..f80527d9 --- /dev/null +++ b/toys/posix/split.c @@ -0,0 +1,108 @@ +/* split.c - split a file into smaller files + * + * Copyright 2013 Rob Landley <rob@landley.net> + * + * See http://pubs.opengroup.org/onlinepubs/9699919799/utilities/split.html + * + * Standard does not cover: + * - should splitting an empty file produce an empty outfile? (Went with "no".) + * - permissions on output file + +USE_SPLIT(NEWTOY(split, ">2a#<1=2>9b#<1l#<1", TOYFLAG_USR|TOYFLAG_BIN)) + +config SPLIT + bool "split" + default y + help + usage: split [-a SUFFIX_LEN] [-b BYTES] [-l LINES] [INPUT [OUTPUT]] + + Copy INPUT (or stdin) data to a series of OUTPUT (or "x") files with + alphabetically increasing suffix (aa, ab, ac... az, ba, bb...). + + -a Suffix length (default 2) + -b BYTES/file (10, 10k, 10m, 10g...) + -l LINES/file (default 1000) +*/ + +#define FOR_split +#include "toys.h" + +GLOBALS( + long lines; + long bytes; + long suflen; + + char *outfile; +) + +void do_split(int infd, char *in) +{ + unsigned long bytesleft, linesleft, filenum, len, pos; + int outfd = -1; + struct stat st; + + // posix doesn't cover permissions on output file, so copy input (or 0777) + st.st_mode = 0777; + fstat(infd, &st); + + len = pos = filenum = bytesleft = linesleft = 0; + for (;;) { + int i, j; + + // Refill toybuf? + if (len == pos) { + if (!(len = xread(infd, toybuf, sizeof(toybuf)))) break; + pos = 0; + } + + // Start new output file? + if ((TT.bytes && !bytesleft) || (TT.lines && !linesleft)) { + char *s = TT.outfile + strlen(TT.outfile); + + j = filenum++; + for (i = 0; i<TT.suflen; i++) { + *(--s) = 'a'+(j%26); + j /= 26; + } + if (j) error_exit("bad suffix"); + bytesleft = TT.bytes; + linesleft = TT.lines; + if (outfd != -1) close(outfd); + outfd = xcreate(TT.outfile, O_RDWR|O_CREAT|O_TRUNC, st.st_mode & 0777); + } + + // Write next chunk of output. + if (TT.lines) { + for (i = pos; i < len; ) { + if (toybuf[i++] == '\n' && !--linesleft) break; + if (!--bytesleft) break; + } + j = i - pos; + } else { + j = len - pos; + if (j > bytesleft) j = bytesleft; + bytesleft -= j; + } + xwrite(outfd, toybuf+pos, j); + pos += j; + } + + if (CFG_TOYBOX_FREE) { + if (outfd != -1) close(outfd); + if (infd) close(infd); + free(TT.outfile); + } + xexit(); +} + +void split_main(void) +{ + if (!TT.bytes && !TT.lines) TT.lines = 1000; + + // Allocate template for output filenames + TT.outfile = xmsprintf("%s% *c", (toys.optc == 2) ? toys.optargs[1] : "x", + (int)TT.suflen, ' '); + + // We only ever use one input, but this handles '-' or no input for us. + loopfiles(toys.optargs, do_split); +} |