diff options
-rw-r--r-- | lib/deflate.c (renamed from toys/pending/compress.c) | 234 |
1 files changed, 103 insertions, 131 deletions
diff --git a/toys/pending/compress.c b/lib/deflate.c index bc69a035..2a6274e7 100644 --- a/toys/pending/compress.c +++ b/lib/deflate.c @@ -1,72 +1,34 @@ -/* compress.c - deflate/inflate code for zip, gzip, zlib, and raw +/* deflate.c - deflate/inflate code for gzip and friends * * Copyright 2014 Rob Landley <rob@landley.net> * - * The inflate/deflate code lives here, so the various things that use it - * either live here or call these commands to pipe data through them. - * - * Divergence from posix: replace obsolete/patented "compress" with mutiplexer. - * (gzip already replaces "uncompress".) - * * See RFCs 1950 (zlib), 1951 (deflate), and 1952 (gzip) * LSB 4.1 has gzip, gunzip, and zcat + * * TODO: zip -d DIR -x LIST -list -quiet -no overwrite -overwrite -p to stdout + */ -// Accept many different kinds of command line argument. -// Leave Lrg at end so flag values line up. - -USE_COMPRESS(NEWTOY(compress, "zcd9lrg[-cd][!zgLr]", TOYFLAG_USR|TOYFLAG_BIN)) - -//zip unzip gzip gunzip zcat - -config COMPRESS - bool "compress" - default n - help - usage: compress [-zgLR19] [FILE] - - Compress or decompress file (or stdin) using "deflate" algorithm. - - -1 min compression - -9 max compression (default) - -g gzip (default) - -L zlib - -R raw - -z zip - -config DECOMPRESS - bool "decompress" - default n - help - usage: compress [-zglrcd9] [FILE] - - Compress or decompress file (or stdin) using "deflate" algorithm. - - -c compress with -g gzip (default) -l zlib -r raw -z zip - -d decompress (autodetects type) -*/ - -#define FOR_compress #include "toys.h" -GLOBALS( +struct deflate { // Huffman codes: base offset and extra bits tables (length and distance) char lenbits[29], distbits[30]; unsigned short lenbase[29], distbase[30]; void *fixdisthuff, *fixlithuff; // CRC - void (*crcfunc)(char *data, int len); - unsigned crc; + void (*crcfunc)(struct deflate *dd, char *data, int len); + unsigned crctable[256], crc; - // Compressed data buffer - char *data; - unsigned pos, len; - int infd, outfd; // Tables only used for deflation unsigned short *hashhead, *hashchain; -) + + // Compressed data buffer (extra space malloced at end) + unsigned pos, len; + int infd, outfd; + char data[]; +}; // little endian bit buffer struct bitbuf { @@ -86,6 +48,7 @@ struct bitbuf *bitbuf_init(int fd, int size) } // Advance bitpos without the overhead of recording bits +// Loads more data when input buffer empty void bitbuf_skip(struct bitbuf *bb, int bits) { int pos = bb->bitpos + bits, len = bb->len << 3; @@ -139,7 +102,7 @@ void bitbuf_flush(struct bitbuf *bb) { if (!bb->bitpos) return; - xwrite(bb->fd, bb->buf, (bb->bitpos+7)/8); + xwrite(bb->fd, bb->buf, (bb->bitpos+7)>>3); memset(bb->buf, 0, bb->max); bb->bitpos = 0; } @@ -164,25 +127,26 @@ void bitbuf_put(struct bitbuf *bb, int data, int len) } } -static void output_byte(char sym) +static void output_byte(struct deflate *dd, char sym) { - int pos = TT.pos++ & 32767; + int pos = dd->pos++ & 32767; - TT.data[pos] = sym; + dd->data[pos] = sym; if (pos == 32767) { - xwrite(TT.outfd, TT.data, 32768); - if (TT.crcfunc) TT.crcfunc(TT.data, 32768); + xwrite(dd->outfd, dd->data, 32768); + if (dd->crcfunc) dd->crcfunc(dd, dd->data, 32768); } } // Huffman coding uses bits to traverse a binary tree to a leaf node, // By placing frequently occurring symbols at shorter paths, frequently // used symbols may be represented in fewer bits than uncommon symbols. +// (length[0] isn't used but code's clearer if it's there.) struct huff { - unsigned short length[16]; - unsigned short symbol[288]; + unsigned short length[16]; // How many symbols have this bit length? + unsigned short symbol[288]; // sorted by bit length, then ascending order }; // Create simple huffman tree from array of bit lengths. @@ -199,10 +163,10 @@ static void len2huff(struct huff *huff, char bitlen[], int len) memset(huff, 0, sizeof(struct huff)); for (i = 0; i<len; i++) huff->length[bitlen[i]]++; - // Sort symbols by bit length. (They'll remain sorted by symbol within that.) + // Sort symbols by bit length, then symbol. Get list of starting positions + // for each group, then write each symbol to next position within its group. *huff->length = *offset = 0; for (i = 1; i<16; i++) offset[i] = offset[i-1] + huff->length[i-1]; - for (i = 0; i<len; i++) if (bitlen[i]) huff->symbol[offset[bitlen[i]]++] = i; } @@ -226,10 +190,10 @@ static unsigned huff_and_puff(struct bitbuf *bb, struct huff *huff) return huff->symbol[start + offset]; } -// Decompress deflated data from bitbuf to TT.outfd. -static void inflate(struct bitbuf *bb) +// Decompress deflated data from bitbuf to dd->outfd. +static void inflate(struct deflate *dd, struct bitbuf *bb) { - TT.crc = ~0; + dd->crc = ~0; // repeat until spanked for (;;) { int final, type; @@ -257,7 +221,7 @@ static void inflate(struct bitbuf *bb) // dump bytes until done or end of current bitbuf contents if (bblen > len) bblen = len; pos = bblen; - while (pos--) output_byte(*(p++)); + while (pos--) output_byte(dd, *(p++)); bitbuf_skip(bb, bblen << 3); len -= bblen; } @@ -308,8 +272,8 @@ static void inflate(struct bitbuf *bb) // Static huffman codes } else { - lithuff = TT.fixlithuff; - disthuff = TT.fixdisthuff; + lithuff = dd->fixlithuff; + disthuff = dd->fixdisthuff; } // Use huffman tables to decode block of compressed symbols @@ -317,19 +281,19 @@ static void inflate(struct bitbuf *bb) int sym = huff_and_puff(bb, lithuff); // Literal? - if (sym < 256) output_byte(sym); + if (sym < 256) output_byte(dd, sym); // Copy range? else if (sym > 256) { int len, dist; sym -= 257; - len = TT.lenbase[sym] + bitbuf_get(bb, TT.lenbits[sym]); + len = dd->lenbase[sym] + bitbuf_get(bb, dd->lenbits[sym]); sym = huff_and_puff(bb, disthuff); - dist = TT.distbase[sym] + bitbuf_get(bb, TT.distbits[sym]); - sym = TT.pos & 32767; + dist = dd->distbase[sym] + bitbuf_get(bb, dd->distbits[sym]); + sym = dd->pos & 32767; - while (len--) output_byte(TT.data[(TT.pos-dist) & 32767]); + while (len--) output_byte(dd, dd->data[(dd->pos-dist) & 32767]); // End of block } else break; @@ -340,28 +304,28 @@ static void inflate(struct bitbuf *bb) if (final) break; } - if (TT.pos & 32767) { - xwrite(TT.outfd, TT.data, TT.pos & 32767); - if (TT.crcfunc) TT.crcfunc(TT.data, TT.pos & 32767); + if (dd->pos & 32767) { + xwrite(dd->outfd, dd->data, dd->pos&32767); + if (dd->crcfunc) dd->crcfunc(dd, dd->data, dd->pos&32767); } } -// Deflate from TT.infd to bitbuf -// For deflate, TT.len = input read, TT.pos = input consumed -static void deflate(struct bitbuf *bb) +// Deflate from dd->infd to bitbuf +// For deflate, dd->len = input read, dd->pos = input consumed +static void deflate(struct deflate *dd, struct bitbuf *bb) { - char *data = TT.data; + char *data = dd->data; int len, final = 0; - TT.crc = ~0; + dd->crc = ~0; while (!final) { // Read next half-window of data if we haven't hit EOF yet. - len = readall(TT.infd, data+(TT.len&32768), 32768); + len = readall(dd->infd, data+(dd->len&32768), 32768); if (len < 0) perror_exit("read"); // todo: add filename if (len != 32768) final++; - if (TT.crcfunc) TT.crcfunc(data+(TT.len&32768), len); - // TT.len += len; crcfunc advances len + if (dd->crcfunc) dd->crcfunc(dd, data+(dd->len&32768), len); + // dd->len += len; crcfunc advances len TODO // store block as literal bitbuf_put(bb, final, 1); @@ -372,57 +336,60 @@ static void deflate(struct bitbuf *bb) bitbuf_put(bb, 0xffff & ~len, 16); // repeat until spanked - while (TT.pos != TT.len) { - unsigned pos = TT.pos & 65535; + while (dd->pos != dd->len) { + unsigned pos = dd->pos&65535; bitbuf_put(bb, data[pos], 8); // need to refill buffer? - if (!(32767 & ++TT.pos) && !final) break; + if (!(32767 & ++dd->pos) && !final) break; } } bitbuf_flush(bb); } // Allocate memory for deflate/inflate. -static void init_deflate(int compress) +static struct deflate *init_deflate(int compress) { int i, n = 1; + struct deflate *dd = xmalloc(sizeof(struct deflate)+32768*(compress ? 4 : 1)); - // compress needs 64k data and 32k each for hashhead and hashchain. - // decompress just needs 32k data. - TT.data = xmalloc(32768*(compress ? 4 : 1)); +// TODO sizeof and order of these? + // decompress needs 32k history, compress adds 64k hashhead and 32k hashchain if (compress) { - TT.hashhead = (unsigned short *)(TT.data + 65536); - TT.hashchain = (unsigned short *)(TT.data + 65536 + 32768); + dd->hashhead = (unsigned short *)(dd->data+65536); + dd->hashchain = (unsigned short *)(dd->data+65536+32768); } // Calculate lenbits, lenbase, distbits, distbase - *TT.lenbase = 3; - for (i = 0; i<sizeof(TT.lenbits)-1; i++) { + *dd->lenbase = 3; + for (i = 0; i<sizeof(dd->lenbits)-1; i++) { if (i>4) { if (!(i&3)) { - TT.lenbits[i]++; + dd->lenbits[i]++; n <<= 1; } if (i == 27) n--; - else TT.lenbits[i+1] = TT.lenbits[i]; + else dd->lenbits[i+1] = dd->lenbits[i]; } - TT.lenbase[i+1] = n + TT.lenbase[i]; + dd->lenbase[i+1] = n + dd->lenbase[i]; } n = 0; - for (i = 0; i<sizeof(TT.distbits); i++) { - TT.distbase[i] = 1<<n; - if (i) TT.distbase[i] += TT.distbase[i-1]; + for (i = 0; i<sizeof(dd->distbits); i++) { + dd->distbase[i] = 1<<n; + if (i) dd->distbase[i] += dd->distbase[i-1]; if (i>3 && !(i&1)) n++; - TT.distbits[i] = n; + dd->distbits[i] = n; } +// TODO layout and lifetime of this? // Init fixed huffman tables for (i=0; i<288; i++) toybuf[i] = 8 + (i>143) - ((i>255)<<1) + (i>279); - len2huff(TT.fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288); + len2huff(dd->fixlithuff = ((struct huff *)toybuf)+3, toybuf, 288); memset(toybuf, 5, 30); - len2huff(TT.fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30); + len2huff(dd->fixdisthuff = ((struct huff *)toybuf)+4, toybuf, 30); + + return dd; } // Return true/false whether we consumed a gzip header. @@ -444,70 +411,75 @@ static int is_gzip(struct bitbuf *bb) return 1; } -void gzip_crc(char *data, int len) +void gzip_crc(struct deflate *dd, char *data, int len) { int i; - unsigned crc, *crc_table = (unsigned *)(toybuf+sizeof(toybuf)-1024); + unsigned crc, *crc_table = dd->crctable; - crc = TT.crc; + crc = dd->crc; for (i=0; i<len; i++) crc = crc_table[(crc^data[i])&0xff] ^ (crc>>8); - TT.crc = crc; - TT.len += len; + dd->crc = crc; + dd->len += len; } -static void do_gzip(int fd, char *name) +long long gzip_fd(int infd, int outfd) { - struct bitbuf *bb = bitbuf_init(1, sizeof(toybuf)); + struct bitbuf *bb = bitbuf_init(outfd, 4096); + struct deflate *dd = init_deflate(1); + long long rc; // Header from RFC 1952 section 2.2: // 2 ID bytes (1F, 8b), gzip method byte (8=deflate), FLAG byte (none), // 4 byte MTIME (zeroed), Extra Flags (2=maximum compression), // Operating System (FF=unknown) - TT.infd = fd; + dd->infd = infd; xwrite(bb->fd, "\x1f\x8b\x08\0\0\0\0\0\x02\xff", 10); - // Use last 1k of toybuf for little endian crc table - crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1); - TT.crcfunc = gzip_crc; + // Little endian crc table + crc_init(dd->crctable, 1); + dd->crcfunc = gzip_crc; - deflate(bb); + deflate(dd, bb); // tail: crc32, len32 bitbuf_put(bb, 0, (8-bb->bitpos)&7); - bitbuf_put(bb, ~TT.crc, 32); - bitbuf_put(bb, TT.len, 32); + bitbuf_put(bb, ~dd->crc, 32); + bitbuf_put(bb, dd->len, 32); + rc = dd->len; bitbuf_flush(bb); free(bb); + free(dd); + + return rc; } -static void do_zcat(int fd, char *name) +long long gunzip_fd(int infd, int outfd) { - struct bitbuf *bb = bitbuf_init(fd, sizeof(toybuf)); + struct bitbuf *bb = bitbuf_init(infd, 4096); + struct deflate *dd = init_deflate(0); + long long rc; if (!is_gzip(bb)) error_exit("not gzip"); - TT.outfd = 1; + dd->outfd = outfd; - // Use last 1k of toybuf for little endian crc table - crc_init((unsigned *)(toybuf+sizeof(toybuf)-1024), 1); - TT.crcfunc = gzip_crc; + // Little endian crc table + crc_init(dd->crctable, 1); + dd->crcfunc = gzip_crc; - inflate(bb); + inflate(dd, bb); // tail: crc32, len32 bitbuf_skip(bb, (8-bb->bitpos)&7); - if (~TT.crc != bitbuf_get(bb, 32) || TT.len != bitbuf_get(bb, 32)) + if (~dd->crc != bitbuf_get(bb, 32) || dd->len != bitbuf_get(bb, 32)) error_exit("bad crc"); - free(bb); -} -// Parse many different kinds of command line argument: + rc = dd->len; + free(bb); + free(dd); -void compress_main(void) -{ - // todo: this - printf("hello world"); + return rc; } |