From 931425ca05b93348dd497598af077d16cdc1cd3c Mon Sep 17 00:00:00 2001 From: Isaac Dunham Date: Sat, 12 Apr 2014 17:26:44 -0500 Subject: roadmap: describe glibc commands. Some glibc commands are irrelevant because they're for functionality that is excluded from musl (mtrace, rpc*, localedef, iconvconfig, nscd). getconf and catchsegv look like candidates for the development toolchain; locale and iconv were already triaged. getent is pretty lame, but it and the timezone stuff (tzselect zic zdump) are the only new possibly interesting commands. --- toys/pending/compress.c | 189 ++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 157 insertions(+), 32 deletions(-) (limited to 'toys/pending/compress.c') diff --git a/toys/pending/compress.c b/toys/pending/compress.c index bb4af46c..48cec494 100644 --- a/toys/pending/compress.c +++ b/toys/pending/compress.c @@ -5,7 +5,8 @@ * The inflate/deflate code lives here, so the various things that use it * either live here or call these commands to pipe data through them. * - * Divergence from posix: replace obsolete "compress" with mutiplexer. + * Divergence from posix: replace obsolete/patented "compress" with mutiplexer. + * (gzip already replaces "uncompress".) * * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip) * LSB 4.1 has gzip, gunzip, and zcat @@ -14,47 +15,125 @@ // Accept many different kinds of command line argument. // Leave Lrg at end so flag values line up. -USE_COMPRESS(NEWTOY(compress, "zcd9Lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN)) -USE_COMPRESS(NEWTOY(zcat, "aLrg[!aLrg]", TOYFLAG_USR|TOYFLAG_BIN)) +USE_COMPRESS(NEWTOY(compress, "zcd9lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN)) +USE_COMPRESS(NEWTOY(gzip, USE_GZIP_D("d")"19dcflqStvgLRz[!gLRz]", TOYFLAG_USR|TOYFLAG_BIN)) +USE_DECOMPRESS(NEWTOY(zcat, 0, TOYFLAG_USR|TOYFLAG_BIN)) +USE_DECOMPRESS(NEWTOY(gunzip, "cflqStv", TOYFLAG_USR|TOYFLAG_BIN)) //zip unzip gzip gunzip zcat config COMPRESS bool "compress" default n + help + usage: compress [-zgLR19] [FILE] + + Compress or decompress file (or stdin) using "deflate" algorithm. + + -1 min compression + -9 max compression (default) + -g gzip (default) + -L zlib + -R raw + -z zip + +config GZIP + bool "gzip" + default y + depends on COMPRESS + help + usage: gzip [-19cfqStvzgLR] [FILE...] + + Compess (deflate) file(s). With no files, compress stdin to stdout. + + On successful decompression, compressed files are replaced with the + uncompressed version. The input file is removed and replaced with + a new file without the .gz extension (with same ownership/permissions). + + -1 Minimal compression (fastest) + -9 Max compression (default) + -c cat to stdout (act as zcat) + -f force (if output file exists, input is tty, unrecognized extension) + -q quiet (no warnings) + -S specify exension (default .*) + -t test compressed file(s) + -v verbose (like -l, but compress files) + + Compression type: + -g gzip (default) -L zlib -R raw -z zip + +config GZIP_D + bool + default y + depends on GZIP && DECOMPRESS + help + usage: gzip [-d] + + -d decompress (act as gunzip) + +config DECOMPRESS + bool "decompress" + default n help usage: compress [-zglrcd9] [FILE] Compress or decompress file (or stdin) using "deflate" algorithm. - -c compress with -g gzip (default) -L zlib -r raw -z zip + -c compress with -g gzip (default) -l zlib -r raw -z zip -d decompress (autodetects type) + config ZCAT bool "zcat" - default n - depends on COMPRESS + default y + depends on DECOMPRESS help usage: zcat [FILE...] Decompress deflated file(s) to stdout + +config GUNZIP + bool "gunzip" + default y + depends on DECOMPRESS + help + usage: gunzip [-cflqStv] [FILE...] + + Decompess (deflate) file(s). With no files, compress stdin to stdout. + + On successful decompression, compressed files are replaced with the + uncompressed version. The input file is removed and replaced with + a new file without the .gz extension (with same ownership/permissions). + + -c cat to stdout (act as zcat) + -f force (output file exists, input is tty, unrecognized extension) + -l list compressed/uncompressed/ratio/name for each input file. + -q quiet (no warnings) + -S specify exension (default .*) + -t test compressed file(s) + -v verbose (like -l, but decompress files) */ #define FOR_compress #include "toys.h" GLOBALS( - // base offset and extra bits tables (length and distance) + // Huffman codes: base offset and extra bits tables (length and distance) char lenbits[29], distbits[30]; unsigned short lenbase[29], distbase[30]; void *fixdisthuff, *fixlithuff; + // CRC void (*crcfunc)(char *data, int len); - unsigned crc, len; + unsigned crc; + + // Compressed data buffer + char *data; + unsigned pos, len; + int fd; - char *outbuf; - unsigned outlen; - int outfd; + // Tables only used for deflation + unsigned short *head, *chain; ) // little endian bit buffer @@ -125,12 +204,12 @@ unsigned bitbuf_get(struct bitbuf *bb, int bits) return result; } -static void outbuf_crc(char sym) +static void data_crc(char sym) { - TT.outbuf[TT.outlen++ & 32767] = sym; + TT.data[TT.pos++ & 32767] = sym; - if (!(TT.outlen & 32767)) { - xwrite(TT.outfd, TT.outbuf, 32768); + if (!(TT.pos & 32767)) { + xwrite(TT.fd, TT.data, 32768); if (TT.crcfunc) TT.crcfunc(0, 32768); } } @@ -146,7 +225,7 @@ struct huff { // Create simple huffman tree from array of bit lengths. -// The symbols in deflate's huffman trees are sorted (first by bit length +// The symbols in the huffman trees are sorted (first by bit length // of the code to reach them, then by symbol number). This means that given // the bit length of each symbol, we can construct a unique tree. static void len2huff(struct huff *huff, char bitlen[], int len) @@ -185,6 +264,33 @@ static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) return huff->symbol[start + offset]; } +// For deflate, TT.len = input read, TT.pos = input consumed +static void deflate(struct bitbuf *bb) +{ + char *data = TT.data; + int len, end = 0; + + TT.crc = ~0; + + // Read next half-window of data if we haven't hit EOF yet. +read_more: + len = readall(TT.fd, data + (TT.len & 32768), 32768); +fprintf(stderr, "read %d@%d\n", len, TT.pos); + if (len < 0) perror_exit("read"); // todo: add filename + if (len != 32768) end++; + TT.len += len; + + // repeat until spanked + while (TT.pos != TT.len) { + unsigned pos = TT.pos & 65535; + + if (!(pos & 32767) && !end) goto read_more; + + TT.pos++; + } +fprintf(stderr, "numberwang %d\n", TT.pos); +} + // Decompress deflated data from bitbuf to filehandle. static void inflate(struct bitbuf *bb) { @@ -216,7 +322,7 @@ static void inflate(struct bitbuf *bb) // dump bytes until done or end of current bitbuf contents if (bblen > len) bblen = len; pos = bblen; - while (pos--) outbuf_crc(*(p++)); + while (pos--) data_crc(*(p++)); bitbuf_skip(bb, bblen << 3); len -= bblen; } @@ -276,7 +382,7 @@ static void inflate(struct bitbuf *bb) int sym = huff_and_puff(bb, lithuff); // Literal? - if (sym < 256) outbuf_crc(sym); + if (sym < 256) data_crc(sym); // Copy range? else if (sym > 256) { @@ -286,9 +392,9 @@ static void inflate(struct bitbuf *bb) len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]); sym = huff_and_puff(bb, disthuff); dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]); - sym = TT.outlen & 32767; + sym = TT.pos & 32767; - while (len--) outbuf_crc(TT.outbuf[(TT.outlen-dist) & 32767]); + while (len--) data_crc(TT.data[(TT.pos-dist) & 32767]); // End of block } else break; @@ -299,18 +405,25 @@ static void inflate(struct bitbuf *bb) if (final) break; } - if (TT.outlen & 32767) { - xwrite(TT.outfd, TT.outbuf, TT.outlen & 32767); - if (TT.crcfunc) TT.crcfunc(0, TT.outlen & 32767); + if (TT.pos & 32767) { + xwrite(TT.fd, TT.data, TT.pos & 32767); + if (TT.crcfunc) TT.crcfunc(0, TT.pos & 32767); } } -static void init_deflate(void) +static void init_deflate(int compress) { - int i, n = 1; + int i, n = 1, size = 32768; // Ye olde deflate window - TT.outbuf = xmalloc(32768); + TT.data = xmalloc(32768*(compress+1)); + if (compress) { + TT.head = (unsigned short *)(TT.data+65536); + TT.chain = TT.head + + +HASH_SIZE = 1 << 15; + + } // Calculate lenbits, lenbase, distbits, distbase *TT.lenbase = 3; @@ -365,17 +478,22 @@ void gzip_crc(char *data, int len) unsigned crc, *crc_table = (unsigned *)(toybuf+sizeof(toybuf)-1024); crc = TT.crc; - for (i=0; i>8); + for (i=0; i>8); TT.crc = crc; TT.len += len; } +static void do_compress(int fd, char *name) +{ + xwrite(1, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10); +} + static void do_zcat(int fd, char *name) { struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf)); if (!is_gzip(bb)) error_exit("not gzip"); - TT.outfd = 1; + TT.fd = 1; // Use last 1k of toybuf for little endian crc table crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1); @@ -398,13 +516,20 @@ void compress_main(void) zcat_main(); } -//#define CLEANUP_compress -//#define FOR_zcat -//#include "generated/flags.h" +#define CLEANUP_compress +#define FOR_zcat +#include "generated/flags.h" void zcat_main(void) { - init_deflate(); + init_deflate(0); loopfiles(toys.optargs, do_zcat); } + +//void gunzip_main(void) +//{ +// init_deflate(0); +// +// loopfiles(toys.optargs, do_zcat); +//} -- cgit v1.2.3